#!/usr/bin/env bash
. "${0%/*}"/../lib/common.bashlib || exit 1 #C#
init
depend cat cp head file mktemp readlink rm stat
include tmpdir
usage <<-EOU
	Usage: ${0##*/} [option]... [patch]...

	Strip patches of typically useless cruft such as "diff --git"
	lines and timestamps to reduce size and improve readability.

	Patches can be passed either as arguments or through stdin,
	and either modified in-place (i.e. sed(1)'s -i) or to stdout.

	Options:
	  -e, --edit         Open patch in \$EDITOR after scrubbing and before QA
	                     (e.g. to add links before unnecessary warnings)

	  -g, --git          Convert \`leading-1.0/file\` to git-style \`a/file\`
	                     rather than warn (do not use if not a -p1 patch)
	  -1, --p0p1         Add one directory level to every files (-p0 -> -p1)

	  -q, --quiet        Don't nag about possible QA issues
	  -n, --dry-run      Don't scrub and only nag about QA instead

	  -s, --no-sanity    Disable \`file\` check for if being misdetected

	  -i, --in-place     Force \`sed -i\`-like behavior (no autodetect)
	  -o, --stdout       Always output the modified patch to stdout (no autodetect)

	  -c, --no-color     Disable use of colors

	      --confdir=PATH Configuration dir to use instead of defaults
	                     (@confdir@ + ${XDG_CONFIG_HOME:-~/.config}/@package@)
	      --dumpconfig   Display config and exit (> ${0##*/}.conf)

	  -h, --help         Display usage information and exit
	      --version      Display version information and exit
EOU

setmsg 2 # stdout may be used to write the patch

optauto args "${@}" <<-EOO
	e|edit=bool:false
	g|git=bool:false
	1|p0p1=bool:false
	q|quiet=bool:false
	n|dry-run=bool:false
	s|!sanity=bool:true
	i|in-place=bool:false
	o|stdout=bool:false
	c|!color=bool:true
EOO
set -- "${args[@]}"; unset args

${O[in-place]} && ${O[stdout]} && die "can't in-place and stdout at same time"
${O[git]} && ${O[p0p1]} && O[git]=false # redundant
${O[edit]} && [[ ! ${EDITOR} ]] && die "-e/--edit given but \$EDITOR is not set"

# do basic sanity checks on supplied files, safety if going to use in-place
is_sane() {
	${O[sanity]} || return 0
	[[ $(LC_ALL=C file -bkLr -- "${1}" | head -n 2) == *'unified diff output'* ]]
}
patches=()
while (( ${#} )); do
	[[ -f ${1} ]] || die "'${1}' is not a valid file"
	is_sane "${1}" || die "'${1}' is not reported to be a unified format patch by file(1) -- if it is, can use -s/--no-sanity to bypass"
	patches+=("${1}")
	shift
done

# devmanual's sed has a -i by default, try to preserve this behavior but
# will drop if input or output is detected to be stdin/stdout
if ${O[in-place]}; then
	stdout=
	stdin=
else
	# maybe in-place, stdout, or force-stdout
	stdin=$(readlink -m /proc/$$/fd/0) || die "readlink failed for stdin"
	[[ -f ${stdin} || ${stdin##*/} == pipe:* ]] || stdin=
	stdout=$(readlink -m /proc/$$/fd/1) || die "readlink failed for stdout"
	[[ -f ${stdout} || ${stdout##*/} == pipe:* ]] || stdout=
	O[in-place]=true
	[[ ${stdin} || ${stdout} ]] || ${O[stdout]} && O[in-place]=false
fi

# use temporary directory to simplify inspection, editing, using stdin, etc..
tmpinit
tmppath tmp
if [[ ${stdin} ]]; then
	cat > "${tmp}/stdin" || die
	exec </dev/tty
	patches+=("${tmp}/stdin")
fi

(( ${#patches[@]} )) || die "no patches given"
(( ${#patches[@]} > 1 )) && ! ${O[in-place]} \
	&& die "no support for multiple files when using stdin/stdout"

# taken from vapier's clean patches howto with small changes (see notes)
#  - https://dev.gentoo.org/~vapier/clean-patches
#  - https://devmanual.gentoo.org/ebuild-writing/misc-files/patches/index.html
sedexps=(
	-e '/^index /d'
	-e '/^new file mode /d'
	-e '/^Index:/d'
	-e '/^=========/d'
	-e '/^RCS file:/d'
	-e '/^retrieving/d'
	-e '/^diff /d'
	-e '/^Files .* differ$/d'
	-e '/^Only in /d'
	-e '/^Common subdirectories/d'
	-e '/^deleted file mode [0-9]*$/d'
	-e '/^--- /s:\t.*::' # original lacks space after ---, added for safety
	-e '/^+++ /s:\t.*::'
	# discard after '---' in header (git noise, e.g. N files changed)
	-e '/^---$/,/^--- /{/^--- /!d}'
	# standardize (https+shorten) "some" common bugzilla urls in header
	-e '0,/^--- /{
		s|http://\(bugs\.gentoo\.org\|bugzilla\.redhat\.com\|gcc\.gnu\.org\|sourceware\.org\)/|https://\1/|g
		s|\(://bugs\.gentoo\.org/\)show_bug\.cgi?id=\([0-9]*\)|\1\2|g
		s|\(://bugzilla\.redhat\.com/\)show_bug\.cgi?id=\([0-9]*\)|\1\2|g
		s|\(://gcc\.gnu\.org/\)bugzilla/show_bug\.cgi?id=\([0-9]*\)|\1PR\2|g
		s|\(://sourceware\.org/\)bugzilla/show_bug\.cgi?id=\([0-9]*\)|\1PR\2|g
	}'
)

# potentially harmful expressions enabled only on-demand
if ${O[git]}; then
	# don't match on leading / to keep e.g. /dev/null as-is
	sedexps+=(
		-e 's:^--- [^/][^/]*/:--- a/:'
		-e 's:^+++ [^/][^/]*/:+++ b/:'
	)
fi
if ${O[p0p1]}; then
	msg "Warning: adding a directory level to every files due to -1/--p0p1"
	sedexps+=(
		-e '/^--- [^/]/s:^--- :&a/:'
		-e '/^+++ [^/]/s:^+++ :&b/:'
	)
fi

inspect() {
	qa() {
		local c
		if [[ ${3} == -* ]]; then
			c=${C[r]}
		elif [[ ${3} == +* ]]; then
			c=${C[g]}
		else
			c=${C[a]}
		fi
		printf "\n${C[y]}%03d: ${c}%s\n${C[a]}^^^${C[n]} %s\n" "${2}" "${3}" "${1}"
	}
	qanl() {
		printf "\n${C[y]}>>>${C[n]} %s\n" "${1}"
	}

	local -i j=0 removed=0
	local crlf=false
	local header=true
	local header_http=false
	local header_text=false

	local l
	while IFS= read -r l; do
		j+=1

		[[ ${l} == ---\ * ]] && header=false

		if ${header}; then
			case ${l} in
				*[a-zA-Z]*) header_text=true;;&
				*http:*|*https:*) header_http=true;;
			esac
			if [[ ${l} =~ ^From\ [a-z0-9]*\ .*\ ..:..:..\ .... ]]; then
				# if know the upstream it's easy to find but it's inconvenient
				qa "suggest replacing line with a http link pointing the upstream commit hash" ${j} "${l}"
			fi
		else
			case ${l} in
				---\ *)
					if [[ ${l} == ---\ /dev/null* ]]; then
						file_a=
					else
						file_a=${l%%$'\t'*}
						file_a=${file_a#--- }
						if [[ ${file_a} != a/* ]]; then
							qa "does not start with a/ (-p1 and short git-style names)" ${j} "${l}"
						fi
						file_a=${file_a##*/}
					fi
				;;
				+++\ *)
					if [[ ${l} == +++\ /dev/null* ]]; then
						qa "is patch removing files? (how about using rm in the ebuild instead?)" ${j} "${l}"
					else
						file_b=${l%%$'\t'*}
						file_b=${file_b#+++ }
						if [[ ${file_b} != b/* ]]; then
							qa "does not start with b/ (-p1 and short git-style names)" ${j} "${l}"
						fi
						file_b=${file_b##*/}

						if [[ ${file_b} == configure ]]; then
							qa "if this 'configure' is autotools-generated, it's preferable to patch"$'\n'"    configure.ac and use eautoreconf instead" ${j} "${l}"
						fi

						if [[ ${file_a} && ${file_a} != "${file_b}" ]]; then
							qa "filename is different from previous line, is this a git-style rename?"$'\n'"   (renaming needs recent patch(1) and won't work with stripped 'diff --git',"$'\n'"    preferable to not rely on this if patch is intended for portage)" ${j} "${l}"
						fi
					fi
				;;
				# this one is hard to add given need to consider #include, #ifdef, etc..
#				+\#*\|+//*) # keep to minimum, characters could mean many things
#					qa "is patch inserting comments? (preferably useless lines should be removed"$'\n'"    and explanations go in patch header, unless it's meant to be upstreamed)" ${j} "${l}"
#				;;
				+*pkg-config*)
					# check for variable too in case of Makefile "PKG_CONFIG ?= pkg-config"
					if [[ ${l} != *PKG_CONFIG* ]]; then
						qa "should use PKG_CONFIG variable over a flat pkg-config call" ${j} "${l}"
					fi
				;;&
				+*/usr*|+*/etc*|+*/var*)
					qa "seeing a potentially top-level path, is it respecting \${EPREFIX}?" ${j} "${l}"
				;;&
				+*/lib64*|+*/lib32*|+*/lib/*) # keep extra / given lib* matches too much
					qa "seem like a library dir, is it respecting \$(get_libdir)?" ${j} "${l}"
				;;&
				*$'\r') crlf=true;;
			esac

			if [[ ${l} == -* ]]; then
				if (( removed++ == 21 )); then
					qa "over 20 lines are being removed, any chance to simplify? e.g. with 'if false'"$'\n'"   (disregard if meant to be upstreamed, go in SRC_URI, or would cause issues)" ${j} "${l}"
				fi
			else
				removed=0
			fi
		fi
	done < "${1}" || die "failed reading '${1}'"

	# checked by other tools, but check the 20kiB limit as a forewarning
	if (( ${2} >= 20480 )); then
		qanl "size greater than 20kiB, reminder that large patches should use SRC_URI"$'\n'"   (or be preferably upstreamed)"
	fi

	# hard to tell if it's a good header or not, but check for lack of effort
	${header_text} || qanl "empty header, usually good to have (at least) some short description"
	${header_http} || qanl "no links in header, verify if relevant bug links or upstream refs to add"

	${crlf} && qanl "note that the patch seems to have some \\r\\n line terminators, use care"$'\n'"    so they aren't accidentally lost"
}

${O[dry-run]} && msg "Would scrub (dry-run):" || msg "Successfully scrubbed:"

hadqa=false
declare -i i oldsize newsize
for patch in "${patches[@]}"; do
	# get temporary path with same basename, always end in .patch to
	# help syntax highlight in $EDITOR and to rename the stdin file
	clean=${tmp}/${patch##*/}
	clean=${clean%.diff}
	clean=${clean%.patch}
	# handle potentially same-named patches from different directories
	if [[ -e ${clean}.patch ]]; then
		i=2
		while [[ -e ${clean}-${i}.patch ]]; do
			i+=1
		done
		clean+=-${i}
	fi
	clean+=.patch

	# scrub
	sed "${sedexps[@]}" -- "${patch}" > "${clean}" || die

	# get patch size before and after for report
	oldsize=$(stat -c %s "${patch}") || die
	(( oldsize )) || die "stat reports '${patch}' to be empty"
	newsize=$(stat -c %s "${clean}") || die
	(( newsize )) || die "stat reports '${clean}' to be empty"

	msgf " - %s (%.2f%% reduction)\n" "${stdin:-${patch}}" \
		$((10**4 - 10**4 * newsize / oldsize))e-2

	if ${O[edit]}; then
		# ignore exit status, may be volatile
		command -- ${EDITOR} "${clean}" >&2 #!SC2086

		# warn if no longer a unified patch, but still proceed to keep work
		is_sane "${clean}" || msg "Warning: after editing, '${clean}' is no longer seen as a unified format patch by file(1)"
	fi

	if ! ${O[quiet]}; then
		qareport=$(inspect "${clean}" "${newsize}")
		if [[ ${qareport} ]]; then
			msg
			msg "QA: ${stdout:-${stdin:-${patch}}} has generated warnings:"
			msg "${qareport}"
			hadqa=true
		fi
	fi

	if ! ${O[dry-run]}; then
		if ${O[in-place]}; then
			cp -f -- "${clean}" "${patch}" || die
		else
			cat -- "${clean}" || die
		fi
	fi
done

if ${hadqa}; then
	msg
	msg "If patch is intended for the ::gentoo tree, would be good to review."
	msg "Note that these checks are tentative and cannot consider everything."
	msg "https://devmanual.gentoo.org/ebuild-writing/misc-files/patches/index.html"
elif ! ${O[dry-run]} && ! ${O[edit]}; then
	msg "Please verify that header(s) have not lost anything meaningful."
fi

# vim: ts=4
