#!/bin/sh

## preamble ##################################################################

ch_lib=$(cd "$(dirname "$0")" && pwd)/../lib
. "${ch_lib}/base.sh"
PATH=${ch_bin}:$PATH

usage=$(cat <<EOF
Convert an image from one format to another.

Usage:

  $ $(basename "$0") [-i FMT] [-o FMT] [OPTION ...] IN OUT

FMT is usually inferred and need not be specified. The first maching glob
below yields the inferred format. Paths need not exist in the filesystem.

  *.sqfs *.squash *.squashfs                                  squash
  *.tar *.t?z *.tar.? *.tar.??                                tar
  /* ./* (i.e., absolute path or relative with explicit dot)  dir

If no glob matches, then the inferred format is the first installed of
ch-image, podman, and docker. If none are installed, there is no inference.

Example:

  $ ch-convert foo/bar:latest /var/tmp/foobar.sqfs
  input:   ch-image  foo/bar:latest
  output:  squashfs  /var/tmp/foobar.sqfs
  copying ...
  done
EOF
)


## conversion functions ######################################################

# These twenty functions (mostly short) are dispatched at the end of the
# script based on their names, but are also useable as components of other
# conversion functions (in which case the caller must ensure intermediate
# outputs do not exist). All take two parameters: $1 is the input descriptor
# and $2 is the output descriptor.

cv_chimage_dir () {
    chimage_in_validate "$1"
    dir_out_validate "$2"
    INFO 'exporting ...'
    # Use tar(1) instead of cp(1) because (1) it supports file exclusions and
    # (2) it allows a progress meter (issue #1332). A very quick, very dirty,
    # completely half-assed performance test suggests that's about the same.
    dir_make "$2"
    # shellcheck disable=SC2086
    tar_ "$img" | quiet tar xf - $tar_xattr_args -pC "$2"
    dir_fixup "$2"
}

cv_chimage_docker () {
    chimage_in_validate "$1"
    docker_out_validate "$2"
    chimage_to_dockman docker_ "$1" "$2"
}

cv_chimage_podman () {
    chimage_in_validate "$1"
    podman_out_validate "$2"
    chimage_to_dockman podman_ "$1" "$2"
}

cv_chimage_squash () {
    chimage_in_validate "$1"
    squash_out_validate "$2"
    img=$(chimage_path "$1")
    cv_dir_squash "$img" "$2"
}

cv_chimage_tar () {
    chimage_in_validate "$1"
    tar_out_validate "$2"
    img=$(chimage_path "$1")
    INFO 'exporting ...'
    tar_ "$img" | gzip_ > "$2"
}

cv_dir_chimage () {
    dir_in_validate "$1"
    chimage_out_validate "$2"
    INFO 'importing ...'
    # “ch-image” recognizes “-q” as an argument, but we choose to quiet it with
    # “quiet” instead here because doing so is simpler from the perspective of
    # “ch-convert”.
    quiet ch-image import "$1" "$2"  # FIXME: no progress meter
}

cv_dir_docker () {
    dir_in_validate "$1"
    docker_out_validate "$2"
    dir_to_dockman docker_ "$1" "$2"
}

cv_dir_podman () {
    dir_in_validate "$1"
    podman_out_validate "$2"
    dir_to_dockman podman_ "$1" "$2"
}

cv_dir_squash () {
    # FIXME: mksquashfs(1) is incredibly noisy. This can be mitigated with
    # -quiet, but that's not available until version 4.4 (2019).
    dir_in_validate "$1"
    squash_out_validate "$2"
    pflist=${tmpdir}/pseudofiles
    INFO 'packing ...'
    quiet touch "$pflist"
    mount_points_ensure "$1" "$pflist"
    # Exclude build cache metadata. 64kiB block size based on Shane’s
    # experiments.
    # shellcheck disable=SC2086
    q=
    if mksquashfs --help 2>&1 | grep -qE '^-quiet'; then
	q=-quiet
    fi
    quiet mksquashfs "$1" "$2" $squash_xattr_arg -b 65536 -noappend -all-root \
		-pf "$pflist" -e "$1"/ch/git -e "$1"/ch/git.pickle $q

    # Zero the archive’s internal modification time at bytes 8–11, 0-indexed
    # [1]. Newer SquashFS-Tools ≥4.3 have option “-fstime 0” to do this, but
    # CentOS 7 comes with 4.2.  [1]: https://dr-emann.github.io/squashfs/
    printf '\x00\x00\x00\x00' | quiet dd of="$2" bs=1 count=4 seek=8 conv=notrunc status=none
    quiet rm "$pflist"
}

cv_dir_tar () {
    dir_in_validate "$1"
    tar_out_validate "$2"
    # Don't add essential files & directories because that will happen later
    # when converted to dir or squash.
    INFO 'packing ...'
    tar_ "$1" | gzip_ > "$2"
}

cv_docker_chimage () {
   dockman_in_validate docker_ "$1"
   chimage_out_validate "$2"
   dockman_to_chimage docker_ "$1" "$2"
}

cv_docker_dir () {
    dockman_in_validate docker_ "$1"
    dir_out_validate "$2"
    dockman_to_dir docker_ "$1" "$2"
}

cv_docker_podman () {
    dockman_in_validate docker_ "$1"
    podman_out_validate "$2"
    docker_out=${tmpdir}/weirdal.tar.gz
    cv_docker_tar "$1" "$docker_out" # FIXME: needlessly compresses
    cv_tar_podman "$docker_out" "$2"
    quiet rm "$docker_out"
}

cv_docker_squash () {
    dockman_in_validate docker_ "$1"
    squash_out_validate "$2"
    dockman_to_squash docker_ "$1" "$2"
}

cv_docker_tar () {
    dockman_in_validate docker_ "$1"
    tar_out_validate "$2"
    dockman_to_tar docker_ "$1" "$2"
}

cv_podman_chimage () {
    dockman_in_validate podman_ "$1"
    chimage_out_validate "$2"
    dockman_to_chimage podman_ "$1" "$2"
}

cv_podman_dir () {
    dockman_in_validate podman_ "$1"
    dir_out_validate "$2"
    dockman_to_dir podman_ "$1" "$2"
}

cv_podman_docker () {
    dockman_in_validate podman_ "$1"
    docker_out_validate "$2"
    podman_out=${tmpdir}/weirdal.tar.gz
    cv_podman_tar "$1" "$podman_out" # FIXME: needlesly compresses
    cv_tar_docker "$podman_out" "$2"
    quiet rm "$podman_out"
}

cv_podman_squash () {
    dockman_in_validate podman_ "$1"
    squash_out_validate "$2"
    dockman_to_squash podman_ "$1" "$2"
}

cv_podman_tar () {
    dockman_in_validate podman_ "$1"
    tar_out_validate "$2"
    dockman_to_tar podman_ "$1" "$2"
}

cv_squash_chimage () {
    squash_in_validate "$1"
    chimage_out_validate "$2"
    unsquash_dir=${tmpdir}/weirdal
    cv_squash_dir "$1" "$unsquash_dir"
    cv_dir_chimage "$unsquash_dir" "$2"
    quiet rm -Rf --one-file-system "$unsquash_dir"
}

cv_squash_dir () {
    squash_in_validate "$1"
    dir_out_validate "$2"
    # Notes about unsquashfs(1):
    #
    # 1. It has no exclude filter, only include, so if the archive includes
    #    bad files like devices, this will fail. I don't know to what degree
    #    this will be a problem.
    #
    # 2. It has no option analagous to tar(1)’s -p, so we have to rely on the
    #    umask to get correct permissions. (Weirdly, it seems to respect umask
    #    for files but not directories.)
    umask_=$(umask)
    umask 0000
    # To support conversion with an empty dir as output (#1612), we add the “-f”
    # option to our unsquashfs(1) call below. Without “-f”, if you try to use
    # unsquashfs(1) with an empty dir as the target, you get the error “failed
    # to make directory path/to/dir, because File exists”. The documentation for
    # “-f” says “if file already exists then overwrite”, but I’ve concluded that
    # the option actually doesn’t “overwrite” directories through the following
    # test:
    #
    # 1. Create an empty directory, let’s call it “parent”.
    #
    # 2. Create another empty directory (“child”) that is a subdirectory of
    #    “parent”.
    #
    # 3. Change the owner and group of “parent” to root, ensure that the owner
    #    and group of “child” are a user (rather than root).
    #
    # 4. Check that both “parent” and “child” have the following permissions:
    #    “drwxr-xr-x”.
    #
    # 5. As the owner of “child”, try to remove “child” and confirm that linux
    #    won’t allow it.
    #
    # 6. Use unsquashfs(1) with the “-f” option to unpack an archive to “child”
    #    and confirm that it’s successful.
    #
    # (https://unix.stackexchange.com/a/583819)
    quiet unsquashfs -f -d "$2" -user-xattrs "$1"
    umask "$umask_"
    dir_fixup "$2"
}

cv_squash_docker () {
    squash_in_validate "$1"
    docker_out_validate "$2"
    squash_to_dockman docker_ "$1" "$2"
}

cv_squash_podman () {
    squash_in_validate "$1"
    podman_out_validate "$2"
    squash_to_dockman podman_ "$1" "$2"
}

cv_squash_tar () {
    squash_in_validate "$1"
    tar_out_validate "$2"
    unsquash_dir=${tmpdir}/weirdal
    cv_squash_dir "$1" "$unsquash_dir"
    cv_dir_tar "$unsquash_dir" "$2"
    quiet rm -Rf --one-file-system "$unsquash_dir"
}

cv_tar_chimage () {
    tar_in_validate "$1"
    chimage_out_validate "$2"
    INFO 'importing ...'
    quiet ch-image import "$1" "$2"  # FIXME: no progress meter
}

cv_tar_dir () {
    tar_in_validate "$1"
    dir_out_validate "$2"
    INFO 'analyzing ...'
    root=$(tar_root "$1")
    INFO 'unpacking ...'
    dir_make "$2"
    # Unpack the tarball. There's a lot going on here.
    #
    #   1. Use a pipe b/c PV ignores arguments if it’s cat rather than PV.
    #
    #   2. Use --strip-components to turn non-tarbombs into tarbombs so we can
    #      just provide our own root directory and not need to clean up later.
    #
    #   3. Use --xform to strip leading “./” so --strip-components gets the
    #      right number. (Leading “/” is always stripped and does not count as
    #      a component.) The trailing “x” gets extended regular expressions,
    #      like “sed -E”.
    #
    #   4. Use --exclude to ignore the contents of “/dev”, because
    #      unprivileged users can't make device files and we overmount that
    #      directory anyway. The order appears to be: --exclude, --xform,
    #      --strip-components, so we have to specify it thrice to account for
    #      for leading “/” and “./”.
    if [ -z "$root" ]; then
        strip_ct=0  # tarbomb
        ex1='dev/*'
    else
        strip_ct=1  # not tarbomb
        # Escape tar glob wildcards; printf avoids echo’s trailing newline.
        root_escaped=$(printf "%s" "$root" | sed -E 's|([[*?])|\\\1|g')
        ex1="${root_escaped}/dev/*"
    fi
    ex2="/${ex1}"
    ex3="./${ex1}"
    VERBOSE "exclude patterns: ${ex1} ${ex2} ${ex3}"
    #shellcheck disable=SC2094,SC2086
     pv_ -s "$(stat -c%s "$1")" < "$1" \
    | quiet tar x"$(tar_decompress_arg "$1")" -pC "$2" -f - \
          --xform 's|^\./||x' --strip-components=$strip_ct \
          --anchored --no-wildcards-match-slash $tar_xattr_args \
          --exclude="$ex1" --exclude="$ex2" --exclude="$ex3"
    dir_fixup "$2"
}

cv_tar_docker () {
    tar_in_validate "$1"
    docker_out_validate "$2"
    tar_to_dockman docker_ "$1" "$2"
}

cv_tar_podman () {
    tar_in_validate "$1"
    podman_out_validate "$2"
    tar_to_dockman podman_ "$1" "$2"
}

cv_tar_squash () {
    tar_in_validate "$1"
    squash_out_validate "$2"
    tar_dir=${tmpdir}/weirdal
    cv_tar_dir "$1" "$tar_dir"
    cv_dir_squash "$tar_dir" "$2"
    quiet rm -Rf --one-file-system "$tar_dir"
}

## Dockman functions ##

# Use for conversions involving docker or podman. Similarities between the
# command line instructions for docker and podman allow to write generalized
# functions that can be used for both (hence “dockman”). When calling a
# “dockman” function, image format is specified by the first argument
# (“docker_” or “podman_”).

chimage_to_dockman () {
    chimage_tar=${tmpdir}/weirdal.tar.gz
    cv_chimage_tar "$2" "$chimage_tar"  # FIXME: needlessly compresses?
    tar_to_dockman "$1" "$chimage_tar" "$3"
    quiet rm "$chimage_tar"
}

dir_to_dockman () {
    dirtar=${tmpdir}/weirdal.tar.gz
    # One could also use “docker build” with “FROM scratch” and “COPY”,
    # apparently saving a tar step. However, this will in fact tar the source
    # directory anyway to send it to the Docker daemon.
    cv_dir_tar "$2" "$dirtar"  # FIXME: needlessly compresses
    tar_to_dockman "$1" "$dirtar" "$3"
    quiet rm "$dirtar"
}

dm_fmt_name () {
    # Format “docker_” and “podman_” as “Docker” and “Podman” respectively.
    case $1 in
	docker_)
	    echo "$1" | tr "d" "D" | tr -d "_"
	    ;;
	podman_)
	    echo "$1" | tr "p" "P" | tr -d "_"
	    ;;
	*)
	    FATAL "unreachable code reached"
	    ;;
    esac
}

dockman_in_validate () {
    digest=$("$1" image ls -q "$2")
    [ -n "$digest" ] || FATAL "source not found in $(dm_fmt_name "$1") storage: ${2}"
}

dockman_to_chimage () {
   dockman_out=${tmpdir}/weirdal.tar.gz
   "cv_${1}tar" "$2" "$dockman_out"  # FIXME: needlessly compresses
   cv_tar_chimage "$dockman_out" "$3"
   quiet rm "$dockman_out"
}

dockman_to_dir () {
    dockman_out=${tmpdir}/weirdal.tar.gz
    "dockman_to_tar" "$1" "$2" "$dockman_out" # FIXME: needlessly compresses
    cv_tar_dir "$dockman_out" "$3"
    quiet rm "$dockman_out"
}

dockman_to_squash () {
    dockman_dir=${tmpdir}/weirdal
    dockman_to_dir "$1" "$2" "$dockman_dir"  # FIXME: needlessly compresses
    cv_dir_squash "$dockman_dir" "$3"
    quiet rm -Rf --one-file-system "$dockman_dir"
}

dockman_to_tar () {
    tmptar=${tmpdir}/weirdal.tar
    tmpenv=${tmpdir}/weirdal.env
    INFO 'exporting ...'
    cid=$("$1" create --read-only "$2" /bin/true)  # cmd needed but not run
    size=$("$1" image inspect "$2" --format='{{.Size}}')
    quiet "$1" export "$cid" | pv_ -s "$size" > "$tmptar"
    "$1" rm "$cid" > /dev/null
    INFO 'adding environment ...'
    "$1" inspect "$2" \
            --format='{{range .Config.Env}}{{println .}}{{end}}' > "$tmpenv"
    # The tar flavor Docker gives us does not support UIDs or GIDs greater
    # than 2**21, so use 0/0 rather than what’s on the filesystem. See #1573.
    # shellcheck disable=SC2086
    quiet tar rf "$tmptar" -b1 -P --owner=0 --group=0 $tar_xattr_args \
        --xform="s|${tmpenv}|ch/environment|" "$tmpenv"
    INFO 'compressing ...'
    pv_ < "$tmptar" | gzip_ -6 > "$3"
    quiet rm "$tmptar"
    quiet rm "$tmpenv"
}

squash_to_dockman () {
    unsquash_tar=${tmpdir}/weirdal.tar.gz
    cv_squash_tar "$2" "$unsquash_tar"
    dockman=$(echo "$1" | tr -d "_") # remove trailing underscore
    "cv_tar_$dockman" "$unsquash_tar" "$3"
    quiet rm "$unsquash_tar"
}

tar_to_dockman () {
    INFO "importing ..."
    tmpimg=$(mktemp -u tmpimg.XXXXXX | tr '[:upper:]' '[:lower:]')
    quiet "$1" import "$2" "$tmpimg"    # FIXME: no progress meter
    # Podman imports our tarballs with rw------- permissions on “/ch” (i.e.,
    # no execute), which causes all kinds of breakage. Work around that.
    quiet "$1" build -t "$3" - <<EOF
FROM $tmpimg
RUN chmod u+rwx /ch || true
EOF
    quiet "$1" rmi "$tmpimg"
}

## input/output validation functions #########################################

# Each of these checks whether $1 can be used as input/output descriptor for
# that format, and also whether it already exists if --no-clobber. Exit with
# error on validation failure.

chimage_in_validate () {
    img=$(chimage_path "$1")
    [ -d "$img" ] || FATAL "source image not found in ch-image storage: $1"
}

chimage_out_validate () {
    img=$(chimage_path "$1")
    if [ -d "$img" ] && [ -n "$no_clobber" ]; then
        FATAL "exists in ch-image storage, not deleting per --no-clobber: ${1}"
    fi
    if [ -n "$xattrs" ]; then
        WARNING -- "--xattrs unsupported for out format \"ch-image\""
    fi
}

# Validate that $1 can be used as an input directory.
dir_in_validate () {
    [ -d "$1" ] || FATAL "not a directory: ${1}"
}

dir_out_validate () {
    parent_validate "$1"
    # $1 must not exist, unless it looks like an image, in which case remove
    # it (or error if --noclobber).
    if [ -e "$1" ]; then
        [ -d "$1" ] || FATAL "exists but not a directory: ${1}"
        if [ -d "${1}/bin" ] && [ -d "${1}/dev" ] && [ -d "${1}/usr" ]; then
            if [ -n "$no_clobber" ]; then
                FATAL "exists, not deleting per --no-clobber: ${1}"
            else
                INFO "deleting existing image: ${1}"
                quiet rm -Rf --one-file-system "$1"
            fi
        elif [ -n "$(find "$1" -maxdepth 0 -type d -empty)" ]; then
            INFO "using empty directory: ${1}"
        else
            FATAL "exists but does not appear to be an image and is not empty: ${1}"
        fi
    fi
}

docker_out_validate () {
    # When writing the out_validate functions, we had to address a podman bug
    # that caused conversion to fail. Our workaround (see the
    # “podman_out_validate” below) is not generalizable to docker, which is
    # why we've broken from the “dockman” convention here.
    digest=$(docker_ image ls -q "$1")
    if [ -n "$digest" ] && [ -n "$no_clobber" ]; then
	FATAL "exists in Docker storage, not deleting per --no-clobber: ${1}"
    fi
}

podman_out_validate () {
    # Podman’s own image search is a substring query, so e.g. “foo” will also
    # match “foobar”. Decades-old UNIX tools to the rescue. Thanks Red Hat!!
    if    ( podman_ images | grep -Eq "^localhost/${1}\s" ) \
       && [ -n "$no_clobber" ]; then
	FATAL "exists in Podman storage, not deleting per --no-clobber: ${1}"
    fi
}

squash_in_validate () {
    [ -e "$1" ] || FATAL "not found: ${1}"
}

squash_out_validate () {
    parent_validate "$1"
    path_noclobber "$1"
}

tar_in_validate () {
    [ -e "$1" ] || FATAL "not found: ${1}"
}

tar_out_validate () {
    case $1 in
        *.tar.gz|*.tgz)
            ;;
        *)
            FATAL "only gzipped tar output (.tar.gz or .tgz) supported"
            ;;
    esac
    parent_validate "$1"
    path_noclobber "$1"
}


## supporting functions ######################################################

# Return the path to image $1 in ch-image storage.
chimage_path () {
    if [ -z "$cli_storage" ]; then
        echo "$(ch-image gestalt storage-path)/img/$(tag_to_path "$1")"
    else
        echo "$cli_storage/img/$(tag_to_path "$1")"
    fi
}

# Return basename of $2 (format $1) with no extension and filesystem-invalid
# characters removed, i.e., suitable for a new extension to be appended. Only
# extensions valid for the format $1 are considered.
desc_base () {
    fmt=$1
    dsc=$2
    case $fmt in
        dir)
            basename "$dsc"
            ;;
        ch-image|docker|podman)
            tag_to_path "$dsc"
            ;;
        squash)
            basename "$dsc" | sed -E 's/\.(sqfs|squash|squashfs|squishy)$//'
            ;;
        tar)
            basename "$dsc" | sed -E 's/\.(t.z|tar(\.(.|..))?)$//'
            ;;
        *)
            FATAL "invalid format: $fmt"
            ;;
    esac
}

# Ensure $1 has everything needed to be an image directory.
dir_fixup () {
    DEBUG "fixing up: $1"
    # Make all directories writeable so we can delete later (hello, Red Hat).
    quiet find "$1" -type d -a ! -perm -u+rwx -exec chmod u+w {} +
    # Ensure mount points are present.
    mount_points_ensure "$1"
}

# Make directory $1, if it doesn’t already exist. Note that by the time we
# call this function, we’ve already confirmed (via “dir_out_validate”) that
# $1 is either a directory or doesn’t exist.
dir_make () {
    if [ ! -e "$1" ]; then
       quiet mkdir "$1"
    fi
}

# Return validated format $1: if non-empty and valid, return it; if empty,
# infer format from the descriptor $2; otherwise, exit with error.
fmt_validate () {
    fmt=$1
    dsc=$2
    if [ -z "$fmt" ]; then
        case $dsc in
            *.sqfs|*.squash|*.squashfs|*.squishy)
                fmt=squash
                ;;
            *.tar|*.t?z|*.tar.?|*.tar.??)
                fmt=tar
                ;;
            /*|./*)
                fmt=dir
                ;;
            *)
                if [ -n "$have_ch_image" ]; then
                    fmt=ch-image
		elif [ -n "$have_podman" ]; then
		    fmt=podman
                elif [ -n "$have_docker" ]; then
                    fmt=docker
                else
                    FATAL "descriptor looks like builder storage but no builder found: ${dsc}"
                fi
                ;;
        esac
    fi
    case $fmt in
        ch-image)
            if [ -z "$have_ch_image" ]; then
                FATAL "format ch-image invalid: ch-image not found"
            fi
            ;;
        docker)
            if [ -z "$have_docker" ]; then
                FATAL "format docker invalid: docker not found"
            fi
            ;;
	podman)
	    if [ -z "$have_podman" ]; then
		FATAL "format podman invalid: podman not found"
	    fi
	    ;;
        dir|squash|tar)
            ;;
        *)
            FATAL "invalid format: ${fmt}"
            ;;
    esac
    echo "$fmt"
}

# Ensure mount points needed by ch-run exist in directory $1. Do nothing if
# something already exists, without dereferencing, in case it's a symlink,
# which will work for bind-mount later but won't resolve correctly now outside
# the container (e.g. linuxcontainers.org images; issue #1015).
#
# If $2 is non-empty, append missing mount points to a list of mksquashfs(1)
# “pseudo files” to that file instead of modifying $1. While pseudo files
# don't conflict with actual files, they do generate a warning.
#
# An alternative approach is to create the mount points in a temporary
# directory, then append that to the SquashFS archive. However, mksquashfs(1)
# does not merge the new files. Ff an existing file or directory is given in
# the appended directory, both go into the archive, with the second renamed
# (to “foo_1”). This make it impossible to add mount points to a directory
# that already exists; e.g., if /etc exists, /etc/resolv.conf will end up at
# /etc_1/resolv.conf.
#
# WARNING: Keep in sync with Image.unpack_init().
mount_points_ensure () {
    # directories
    for i in bin dev etc mnt proc usr \
             mnt/0 mnt/1 mnt/2 mnt/3 mnt/4 mnt/5 mnt/6 mnt/7 mnt/8 mnt/9; do
        if ! exist_p "${1}/${i}"; then
            if [ -n "$2" ]; then
                quiet echo "${i} d 755 root root" >> "$2"
            else
                quiet mkdir "${1}/${i}"
            fi
        fi
    done
    # files
    for i in etc/hosts etc/resolv.conf; do
        if ! exist_p "${1}/${i}"; then
            if [ -n "$2" ]; then
                quiet echo "${i} f 644 root root true" >> "$2"
            else
                quiet touch "${1}/${i}"
            fi
        fi
    done
}

# Validate the parent or enclosing directory of $1 exists.
parent_validate () {
    parent=$(dirname "$1")
    [ -d "$parent" ] || "not a directory: $parent"
}

# Exit with error if $1 exists and --no-clobber was given.
path_noclobber () {
    if [ -e "$1" ] && [ -n "$no_clobber" ]; then
        FATAL "exists, not deleting per --no-clobber: ${1}"
    fi
}

# Tar $1 and emit the result on stdout, excluding build cache metadata.
# Produce a tarbomb because Docker requires tarbombs.
tar_ () {
    # shellcheck disable=SC2086
    ( cd "$1" && tar cf - $tar_xattr_args \
                          --exclude=./ch/git \
                          --exclude=./ch/git.pickle . ) | pv_
}

# Print the appropriate tar(1) decompression argument for file named $1, which
# may be the empty string, because GNU tar is unable to infer it if input is a
# pipe [1], and we want to keep pv(1).
#
# [1]: https://www.gnu.org/software/tar/manual/tar.html#gzip
tar_decompress_arg () {
    case $1 in
        *.tar)
            echo
            ;;
        *.tar.gz)
            echo z
            ;;
        *.tar.xz)
            echo J
            ;;
        *.tgz)
            echo z
            ;;
        *)
            FATAL "unknown extension: ${1}"
            ;;
    esac
}

# Print the name of the root directory of tarball $1 on stdout, if there is
# one. If not, i.e. $1 is a tarbomb, return the empty string.
#
# We don't use pv(1) for this and therefore let tar infer the compression.
#
# This is rather messy because:
#
#   1. Archive members can start with “/” (slash) or “./” (dot, slash), both
#      of which are ignored on unpacking. For example, the root directory as
#      listed in the tarball might be “foo”, “/foo”, or “./foo”; in all three
#      cases this function prints “foo”.
#
#   2. Tarballs have no index, so listing all members requires reading and
#      decompressing the entire archive (and unpacking is a second full read).
#      I have not found a way to detect a tarbomb without listing all members;
#      doing so is issue #1325.
#
#      I have not tested decompressing once and then reading the decompressed
#      version twice. Some quick testing suggests that we spend almost all the
#      read time in gzip, but this approach adds time to write the
#      uncompressed tarball in addition to space to store it, so it's not an
#      appealing approach to me.
#
#      Previously, we listed only the first N members, but this breaks on
#      Spack images if /spack is first in the archive because that directory
#      can contain tens of thousands of files (maybe more).
#
#   3. GNU tar lists members newline-separated. This still works for member
#      names containing newline, because it's escaped as “\n”. Some other
#      characters are escaped too, e.g. tab is “\t”. I am assuming this case
#      is unlikely for container image tarballs, so this function has not been
#      tested with such members.
#
# See also: https://unix.stackexchange.com/a/242712
tar_root () {
    # The three commands in this sed script are: (1) remove leading “/” or
    # “./” if present; (2) delete from the first slash to the end of the line
    # inclusive, i.e. everything except the first component; (3) delete blank
    # lines, because the first component often appears alone. We use sed
    # because --xform does not apply to listing.
    #shellcheck disable=SC2094
    list=$(  pv_ -s "$(stat -c%s "$1")" < "$1" \
           | tar t"$(tar_decompress_arg "$1")" -f - \
           | sed -E 's|^\.?/||; s|/.*$||; /^$/d')
    # Get the first path component of the first file in the tarball.
    root1=$(echo "$list" | head -n 1)
    # Find members whose first component does not match the first member’s.
    if echo "$list" | grep -Fxvq "$root1"; then
        VERBOSE 'tarbomb: yes'
        echo ''
    else
        VERBOSE 'tarbomb: no'
        echo "$root1"
    fi
}

# Set $tmpdir to be a new directory with a unique and unpredictable name, as a
# subdirectory of --tmp, $TMPDIR, or /var/tmp, whichever is first set.
tmpdir_setup () {
    if [ -z "$tmpdir" ]; then
        if [ -n "$TMPDIR" ]; then
            tmpdir=$TMPDIR
        else
            tmpdir=/var/tmp
        fi
    fi
    case $tmpdir in
        /*)
            ;;
        *)
            FATAL "temp dir must be absolute: ${tmpdir}"
            ;;
    esac
    tmpdir=$(mktemp -d --tmpdir="$tmpdir" ch-convert.XXXXXX)
}

# Error out if “--xattrs” and “--no-xattrs” are specified in the same command
# line.
xattr_opt_err () {
    if [ -n "$xattrs" ] && [ -n "$no_xattrs" ]; then
        FATAL "\"--xattrs\" incompatible with \"--no-xattrs\""
    fi
}


## main ######################################################################

while true; do
    if ! parse_basic_arg "$1"; then
        case $1 in
            -i|--in-fmt)
                shift
                in_fmt=$1
                ;;
            -i=*|--in-fmt=*)
                in_fmt=${1#*=}
                ;;
            -n|--dry-run)
                dry_run=yes
                ;;
            --no-clobber)
                no_clobber=yes
                ;;
            --no-xattrs)
                no_xattrs=yes
                xattr_opt_err
                ;;
            -o|--out-fmt)
                shift
                out_fmt=$1
                ;;
            -o=*|--out-fmt=*)
                out_fmt=${1#*=}
                ;;
            -s|--storage)
                shift
                cli_storage=$1
                ;;
            --tmp)
                shift
                tmpdir=$1
                ;;
            --xattrs)
                xattrs=yes
                xattr_opt_err
                ;;
            *)
                break
                ;;
        esac
    fi
    shift
done
if [ "$#" -ne 2 ]; then
    usage
fi
# This bizarre bit of syntax comes from https://unix.stackexchange.com/a/28782
if [ -n "$xattrs" ] || { [ -n "$CH_XATTRS" ] && [ -z "$no_xattrs" ]; }; then
    echo "preserving xattrs..."
    tar_xattr_args='--xattrs-include=user.* --xattrs-include=system.*'
    squash_xattr_arg=-xattrs
else
    echo "discarding xattrs..."
    tar_xattr_args=
    squash_xattr_arg=
fi
in_desc=$1
out_desc=$2
VERBOSE "verbose level: ${log_level}"

if command -v ch-image > /dev/null 2>&1; then
    have_ch_image=yes
    VERBOSE 'ch-image: found'
else
    VERBOSE 'ch-image: not found'
fi
if command -v docker > /dev/null 2>&1; then
    have_docker=yes
    VERBOSE 'docker: found'
else
    VERBOSE 'docker: not found'
fi
if command -v podman > /dev/null 2>&1; then
    have_podman=yes
    VERBOSE 'podman: found'
else
    VERBOSE 'podman: not found'
fi

in_fmt=$(fmt_validate "$in_fmt" "$in_desc")
out_fmt=$(fmt_validate "$out_fmt" "$out_desc")
tmpdir_setup

VERBOSE "temp dir: ${tmpdir}"
VERBOSE "noclobber: ${no_clobber:-will clobber}"
INFO 'input:   %-8s  %s' "$in_fmt"  "$in_desc"
INFO 'output:  %-8s  %s' "$out_fmt" "$out_desc"

if [ "$in_fmt" = "$out_fmt" ]; then
    FATAL 'input and output formats must be different'
fi

if [ -z "$dry_run" ]; then
    # Dispatch to conversion function. POSIX sh does not support hyphen in
    # function names, so remove it.
    "cv_$(echo "$in_fmt" | tr -d '-')_$(echo "$out_fmt" | tr -d '-')" \
        "$in_desc" "$out_desc"
fi

quiet rmdir "$tmpdir"

INFO 'done'
