#!/bin/bash
# Syncs Arch, ALARM or Arch32 repos based on info contained in the
# accompanying .conf files.
# License: GPLv3

set -eE

source "$(librelib messages)"
source "$(dirname "$(readlink -e "$0")")/config"

readonly -a UPSTREAMS=(archlinux{,32,arm})

# Run as `V=true db-import-pkg-archlinux` to get verbose output
VERBOSE=${V}
extra=()
${VERBOSE} && extra+=(-v)

WORKDIR=$(mktemp -dt "${0##*/}.XXXXXXXXXX")
trap "rm -rf -- $(printf '%q' "${WORKDIR}")" EXIT

trap_exit() {
	local signal=$1; shift
	echo
	error "$@"
	trap -- "$signal"
	kill "-$signal" "$$"
}

# From makepkg
for signal in TERM HUP QUIT; do
    trap "trap_exit $signal '%s signal caught. Exiting...' $signal" \
	 "$signal"
done
unset signal
trap 'trap_exit INT "Aborted by user! Exiting..."' INT
trap 'trap_exit USR1 "An unknown error has occurred. Exiting..."' ERR

fatal_error() {
	error "$@"
	exit 1
}

# Print usage message
if [[ $# -ne 1 ]] || ! in_array "$1" "${UPSTREAMS[@]}" ; then
	fatal_error 'usage: %s {%s\b}' \
	            "${0##*/}" \
	            "$(printf -- ' %s |' "${UPSTREAMS[@]}")"
fi

readonly UPSTREAM=$1
source "$(dirname "$(readlink -e "$0")")/db-import-${UPSTREAM}.conf"

# Check variables presence
vars=(DBEXT FILESEXT mirror mirrorpath WORKDIR FTP_BASE ARCHREPOS ARCHPKGPOOLS)
[[ $UPSTREAM == archlinux ]] && vars+=(ARCHSRCPOOLS)
for var in "${vars[@]}"; do
	test -z "${!var}" && fatal_error "Empty %s" "${var}"
done

# usage: sync_dbs <from> <into>
#
# Sync excluding everything but db files
# TODO: we could be doing without things other than what is in
#       ${ARCHARCHES[@]} and ${ARCHREPOS[@]}
sync_dbs() {
	rsync "${extra[@]}" --no-motd -mrtlH --no-p --include="*/" \
		--include="*.db" \
		--include="*${DBEXT}" \
		--include="*.files" \
		--include="*${FILESEXT}" \
		--exclude="*" \
		--delete-after \
		"$1" "$2"
}

# usage: get_repo_workdir <repo> <arch>
#
# Prints workdir path for given <repo> <arch> combination
get_repo_workdir() {
    case "$UPSTREAM" in
	archlinux)
	    printf -- '%s' "${WORKDIR}/${1}/os/${2}/" ;;
	archlinux32|archlinuxarm)
	    printf -- '%s' "${WORKDIR}/${2}/${1}/" ;;
    esac
}

# usage: get_repo_content <path-to-db>
#
# Prints a list of packages within a given <path-to-db>
get_repo_content() {
	bsdtar tf "${1}" | \
		cut -d "/" -f 1 | \
		sort -u
}

# usage: make_whitelist <output-file> <path-to-db> <blacklisted-pkg1> [...]
#
# Has 2 side effects:
#   1. Notably, overwrites <output-file> with the whitelist created from...
#   2. Cleaning <path-to-db> from <blacklisted-pkg1> [...] in the process.
#   2.1. repo-remove will also clean the corresponding .files db during 2.
make_whitelist() {
	local -r output_file=$1 db_file=$2 blacklist=(${@:3})
	# Remove blacklisted packages and count them
	# TODO: capture all removed packages for printing on debug mode
	msg2 "Removing blacklisted packages from %s ..." "${db_file##*/}"
	LC_ALL=C repo-remove "$db_file" "${blacklist[@]}" |&
		sed -n 's/-> Removing/	&/p'

	# Get db contents
	local -r db=($(get_repo_content "${db_file}"))
	msg2 "%d packages in whitelist" ${#db[@]}

	# Create a whitelist, add * wildcard to end.
	# FIXME: due to lack of -arch suffix, the pool sync retrieves
	# every arch even if we aren't syncing them.
	# IMPORTANT: the . in the sed command is needed because an empty
	# whitelist would consist of a single * allowing any package to
	# pass through.
	printf '%s\n' "${db[@]}" | sed "s|.$|&*|g" > "$output_file"
}

# usage: < <whitelist> filter_duplicates
#
# Don't import arch=(any) packages present elsewhere, it confuses parabolaweb.
# This reads a whitelist from stdin and prints it without said duplicates.
filter_duplicates() {
	grep -vf <(find "${FTP_BASE}/pool/" \
	                -name "*-any${PKGEXT}" \
	                -printf "%f\n" | sed  's/-any\.pkg.*/*/') --
}

# usage: sync_pool <from> <path-to-whitelist> <into>
#
# Sync excluding everything but whitelist
sync_pool() {
	local -r _from=$1 _whitelist=$2 _into=$3

	mkdir -p -- "$_into"
	msg2 "Retrieving %d packages from %s pool" \
	     "$(wc -l "$_whitelist" | cut -d' ' -f1)" \
	     "$(basename "$_from")"

	# *Don't delete-after*, this is the job of
	# cleanup scripts. It will remove our packages too
	rsync "${extra[@]}" --no-motd -rtlH \
	      --delay-updates \
	      --safe-links \
	      --include-from="$_whitelist" \
	      --exclude="*" \
	      "$_from" \
	      "$_into"
}

# usage: sync_repo <from> <path-to-whitelist> <into>
#
# Sync excluding everything but whitelist.
# TODO: this is too similar to sync_pool(). Merge?
sync_repo() {
	local -r _from=$1 _whitelist=$2 _into=$3
	mkdir -p -- "$_into"
	msg2 "Retrieving %d files from repo" \
	     "$(wc -l "$_whitelist" | cut -d' ' -f1)"

	# We delete here for cleanup
	rsync "${extra[@]}" --no-motd -rtlH \
	      --delete-after \
	      --delete-excluded \
	      --delay-updates \
	      --include-from="$_whitelist" \
	      --exclude="*" \
	      "$_from" \
	      "$_into"
}

# usage: make_repo_symlinks <pool> <path-to-whitelist> <repo> <arch>
#
# Generate symbolic links to target packages <repo-whitelist> lying in
# some of our <pool>s, and put them in $FTP_BASE/<repo>/os/<arch>.
#
# Use this after `sync_pool`ing from an upstream with no pool(s) and
# therefore no symlinks inside <repo>/os/<arch>.
make_repo_symlinks() {
	local -r pool=$1 whitelist=$2 repo=$3 arch=$4

	msg2 "Putting symlinks in ${repo}/os/${arch}"
	mkdir -p -- "${FTP_BASE}/${repo}/os/${arch}"

	local pkgfile
	while read pkgfile; do
		local path="${FTP_BASE}/${pool}/${pkgfile}"
		if [[ ! -f "$path" ]]; then
			# pkg was an `any.pkg.tar.?z`, find which pool it's in.
			pkgfile=${pkgfile/${arch}/any}
			# HACK: Arch32 appends '.digits' to pkgrels. That
			# prevents us from finding the corresponding package.
			shopt -s extglob &&
				pkgfile=${pkgfile/.+([0-9])-any/-any} || :
			shopt -u extglob || :
			local any_pkgs=(${FTP_BASE}/pool/*/${pkgfile})
			path="${any_pkgs[0]}"
		fi
		# give up
		if [[ ! (-f "$path" && -f "${path}.sig") ]]; then
			warning "No file was found for %s, skipping" \
			        "${pkgfile%-*}"
			continue
		fi
		local symlink="${FTP_BASE}/${repo}/os/${arch}/${path##*/}"
		ln -sfv "../../../pool/${path##*/pool/}" "$symlink"
		ln -sfv "../../../pool/${path##*/pool/}.sig" "${symlink}.sig"
		local -a new_whitelist+=($symlink)
	done < <(sed "s/*/-${arch}.pkg.tar.xz/" "$whitelist")
	printf -- '%s\n' "${new_whitelist[@]}" > "$whitelist"
}

# usage: make_repo_dbs <repo> <arch>
make_repo_dbs() {
	local -r from=$(get_repo_workdir "$1" "$2")/
	local -r into=${FTP_BASE}/${1}/os/${2}/
	local -r db_file=${from}/${1}${DBEXT}
	local -r files_file=${from}/${1}${FILESEXT}
	local -r whitelist=/tmp/${1}-${2}.whitelist

	# create fresh databases to reflect actual `any.pkg.tar.xz` packages.
	# this also avoids corrupt upstream metadata (ALARM)
	msg2 "Adding whitelisted packages to clean %s and %s ..." \
	     "${db_file##*/}" "${files_file##*/}"
	rm "$db_file" "$files_file"
	case "$UPSTREAM" in
	    archlinux)
		LC_ALL=C repo-add "$db_file" \
		         $(sed "s|^|${into}|; s|$|${PKGEXT}|" "$whitelist") |&
			sed -n 's/==> Adding/   -> Adding/p'
		;;
	    archlinux32|archlinuxarm)
		LC_ALL=C repo-add "$db_file" $(cat "$whitelist") |&
			sed -n 's/==> Adding/   -> Adding/p'
		;;
	esac

	msg2 "Updating %s-%s databases" "$2" "$1"
	mkdir -p -- "$into"
	rsync "${extra[@]}" --no-motd -rtlH \
	      --delay-updates \
	      --safe-links \
	      "$from" "$into"
}

# Main function. Process the databases and get the libre packages
# Outline:
# * Get repo.db from an Arch-like repo
# * Generate a list of available packages
# * Create sync whitelist (based on package blacklist)
# * Get packages and signatures
# * Create new repo.db with them
# * Sync repo.db => repo.db
init() {
	# Get the blacklisted packages
	libreblacklist update
	local -a blacklist=($(libreblacklist cat | libreblacklist get-pkg))
	test ${#blacklist[@]} -eq 0 && fatal_error "Empty blacklist"
	msg2 "%d packages in blacklist" ${#blacklist[@]}

	# Sync the repos databases
	msg 'Retrieving .db and .files files'
	sync_dbs "rsync://${mirror}/${mirrorpath}/" "$WORKDIR"

	# Traverse all repo-arch pairs
	local _arch _repo
	for _arch in "${ARCHARCHES[@]}"; do
		for (( _repo = 0; _repo < ${#ARCHREPOS[@]}; ++_repo )); do
			local reponame=${ARCHREPOS[$_repo]}
			msg "Processing %s-%s" "${_arch}" "${reponame}"
			local db_file=$(get_repo_workdir "${reponame}" "${_arch}")/${reponame}${DBEXT}
			local files_file=$(get_repo_workdir "${reponame}" "${_arch}")/${reponame}${FILESEXT}
			local _file
			for _file in db_file files_file; do
				if [ ! -f "${!_file}" ]; then
					warning "%s doesn't exist, skipping this arch-repo" \
					        "${!_file}"
					unset ARCHREPOS[$_repo]
					continue
				fi
			done

			make_whitelist "/tmp/${reponame}-${_arch}.whitelist" \
			               "$db_file" \
			               "${blacklist[@]}"
			case "$UPSTREAM" in
			    archlinux)
				# Append to whitelists array so that we can
                                # later sync_pool() all packages
				local -a whitelists+=(/tmp/${reponame}-${_arch}.whitelist)
				# Get repo packages (symlinks)
				sync_repo \
				    "rsync://${mirror}/${mirrorpath}/${reponame}/os/${_arch}/" \
				    "/tmp/${reponame}-${_arch}.whitelist" \
				    "${FTP_BASE}/${reponame}/os/${_arch}/"
				;;
			    archlinux32|archlinuxarm)
				# Upstream doesn't use an $ARCHPKGPOOL
				filter_duplicates \
				    < "/tmp/${reponame}-${_arch}.whitelist" \
				    > "/tmp/${reponame}-${_arch}-nodups.whitelist"
				sync_pool "rsync://${mirror}/${mirrorpath}/${_arch}/${reponame}/" \
				          "/tmp/${reponame}-${_arch}-nodups.whitelist" \
				          "${FTP_BASE}/${ARCHPKGPOOLS}/"
				;;
			esac
		done
	done

	case "$UPSTREAM" in
	    archlinux)
		# Concatenate all whitelists, check for single *s just in case
		cat "${whitelists[@]}" | grep -v "^\*$" |
			sort -u > "/tmp/${UPSTREAM}-all.whitelist"
		# FIXME: make_whitelist() wildcards should be narrowed
		#        down to respect ${ARCHARCHES[@]}

		msg "Syncing package pools"
		local pkgpool
		for pkgpool in "${ARCHPKGPOOLS[@]}"; do
			sync_pool \
			    "rsync://${mirror}/${mirrorpath}/${pkgpool}/" \
			    "/tmp/${UPSTREAM}-all.whitelist" \
			    "${FTP_BASE}/${pkgpool}/"
		done

		msg "Syncing source pool"
		local srcpool
		for srcpool in "${ARCHSRCPOOLS[@]}"; do
			sync_pool \
			        "rsync://${mirror}/${mirrorpath}/${srcpool}/" \
			        "/tmp/${UPSTREAM}-all.whitelist" \
			        "${FTP_BASE}/${srcpool}/"
		done
		;;
	    archlinux32|archlinuxarm)
		msg "Generating symbolic links to pool"

		local _arch _repo _pkgpool
		for _arch in "${ARCHARCHES[@]}"; do
			for _repo in "${ARCHREPOS[@]}"; do
				for _pkgpool in "${ARCHPKGPOOLS[@]}"; do
					make_repo_symlinks \
                                                "$_pkgpool" \
					         "/tmp/${_repo}-${_arch}.whitelist" \
					         "$_repo" \
					         "$_arch"
				done
			done
		done
		;;
	esac

	msg "Putting databases back in place"

	# FIXME: all repo DBs should be replaced at once (per architecture)
	for _arch in "${ARCHARCHES[@]}"; do
		for _repo in "${ARCHREPOS[@]}"; do
                        make_repo_dbs "$_repo" "$_arch"
		done
	done

	date -u +%s > "${FTP_BASE}/lastsync"
}

init
