#!/bin/bash
# Syncs Arch, ALARM or Arch32 repos based on info contained in the
# accompanying .conf files.
# License: GPLv3

set -eE
source "$(librelib messages)"
source "$(librelib blacklist)"
source "$(librelib conf)"
setup_traps

readonly -a UPSTREAMS=(packages community archlinux{32,arm})

# usage: fetch_dbs <from> <into>
#
# Fetch excluding everything but db files
# TODO: we could be doing without things other than what is in
#       ${ARCHTAGS[@]}
fetch_dbs() {
	rsync "${extra[@]}" --no-motd -mrtlH --no-p \
		--include="*/" \
		--include="*.db" \
		--include="*${DBEXT}" \
		--include="*.files" \
		--include="*${FILESEXT}" \
		--exclude="*" \
		--delete-after \
		"$1" "$2"
}

# usage: get_repo_workdir <repo> <arch>
#
# Prints workdir path for given <repo> <arch> combination
get_repo_workdir() {
	case "$UPSTREAM" in
		packages|community)
			printf -- '%s' "${WORKDIR}/rsync/${1}/os/${2}/" ;;
		archlinux32|archlinuxarm)
			printf -- '%s' "${WORKDIR}/rsync/${2}/${1}/" ;;
	esac
}

# usage: get_repo_content <path-to-db>
#
# Prints a list of packages within a given <path-to-db>
get_repo_content() {
	bsdtar tf "${1}" | \
		cut -d "/" -f 1 | \
		sort -u
}

# usage: make_whitelist <output-file> <path-to-db> <blacklisted-pkg1> [...]
#
# Has 2 side effects:
#   1. Notably, overwrites <output-file> with the whitelist created from...
#   2. Cleaning <path-to-db> from <blacklisted-pkg1> [...] in the process.
#   2.1. repo-remove will also clean the corresponding .files db during 2.
make_whitelist() {
	local -r output_file=$1 db_file=$2
	# Remove blacklisted packages and count them
	# TODO: capture all removed packages for printing on debug mode
	msg2 "Removing blacklisted packages from %s ..." "${db_file##*/}"
	blacklist-cat | blacklist-get-pkg | xargs -d '\n' repo-remove "$db_file"

	# Get db contents
	local -r db=($(get_repo_content "${db_file}"))
	msg2 "%d packages in whitelist" ${#db[@]}

	# Create a whitelist, add * wildcard to end.
	# FIXME: due to lack of -arch suffix, the pool sync retrieves
	# every arch even if we aren't syncing them.
	# IMPORTANT: the . in the sed command is needed because an empty
	# whitelist would consist of a single * allowing any package to
	# pass through.
	printf '%s\n' "${db[@]}" | sed "s|.$|&*|g" > "$output_file"
}

# usage: < <whitelist> filter_duplicates
#
# Don't import arch=(any) packages present elsewhere, it confuses parabolaweb.
# This reads a whitelist from stdin and prints it without said duplicates.
filter_duplicates() {
	grep -vf <(find "${FTP_BASE}/pool/" \
	                -name "*-any${PKGEXT}" \
	                -printf "%f\n" | sed  's/-any\.pkg.*/*/') --
}

# usage: sync_pool <from> <path-to-whitelist> <into>
#
# Sync excluding everything but whitelist
sync_pool() {
	local -r _from=$1 _whitelist=$2 _into=$3

	mkdir -p -- "$_into"
	msg2 "Retrieving %d packages from %s pool" \
		"$(wc -l < "$_whitelist")" \
		"$(basename "$_from")"

	# *Don't delete-after*, this is the job of
	# cleanup scripts. It will remove our packages too
	rsync "${extra[@]}" --no-motd -rtlH \
		--delay-updates \
		--safe-links \
		--include-from="$_whitelist" \
		--exclude="*" \
		"$_from" \
		"$_into"
}

# usage: sync_repo <from> <path-to-whitelist> <into>
#
# Sync excluding everything but whitelist.
# TODO: this is too similar to sync_pool(). Merge?
sync_repo() {
	local -r _from=$1 _whitelist=$2 _into=$3
	mkdir -p -- "$_into"
	msg2 "Retrieving %d files from repo" \
		"$(wc -l < "$_whitelist")"

	# We delete here for cleanup
	rsync "${extra[@]}" --no-motd -rtlH \
		--delete-after \
		--delete-excluded \
		--delay-updates \
		--include-from="$_whitelist" \
		--exclude="*" \
		"$_from" \
		"$_into"
}

# usage: make_repo_symlinks <pool> <path-to-whitelist> <repo> <arch>
#
# Generate symbolic links to target packages <repo-whitelist> lying in
# some of our <pool>s, and put them in $FTP_BASE/<repo>/os/<arch>.
#
# Use this after `sync_pool`ing from an upstream with no pool(s) and
# therefore no symlinks inside <repo>/os/<arch>.
make_repo_symlinks() {
	local -r pool=$1 whitelist=$2 repo=$3 arch=$4

	msg2 "Putting symlinks in ${repo}/os/${arch}"
	mkdir -p -- "${FTP_BASE}/${repo}/os/${arch}"

	local pkgfile
	while read pkgfile; do
		local path="${FTP_BASE}/${pool}/${pkgfile}"
		if [[ ! -f "$path" ]]; then
			# pkg was an `any.pkg.tar.?z`, find which pool it's in.
			pkgfile=${pkgfile/${arch}/any}
			# HACK: Arch32 appends '.digits' to pkgrels. That
			# prevents us from finding the corresponding package.
			shopt -s extglob &&
				pkgfile=${pkgfile/.+([0-9])-any/-any} || :
			shopt -u extglob || :
			local any_pkgs=(${FTP_BASE}/pool/*/${pkgfile})
			path="${any_pkgs[0]}"
		fi
		# give up
		if [[ ! (-f "$path" && -f "${path}.sig") ]]; then
			warning "No file was found for %s, skipping" \
				"${pkgfile%-*}"
			continue
		fi
		local symlink="${FTP_BASE}/${repo}/os/${arch}/${path##*/}"
		ln -sfv "../../../pool/${path##*/pool/}" "$symlink"
		ln -sfv "../../../pool/${path##*/pool/}.sig" "${symlink}.sig"
		local -a new_whitelist+=($symlink)
	done < <(sed "s/*/-${arch}.pkg.tar.xz/" "$whitelist")
	printf -- '%s\n' "${new_whitelist[@]}" > "$whitelist"
}

# usage: make_repo_dbs <repo> <arch>
make_repo_dbs() {
	local -r from=$(get_repo_workdir "$1" "$2")/
	local -r into=${FTP_BASE}/${1}/os/${2}/
	local -r db_file=${from}/${1}${DBEXT}
	local -r files_file=${from}/${1}${FILESEXT}
	local -r whitelist=${WORKDIR}/${1}-${2}.whitelist

	# create fresh databases to reflect actual `any.pkg.tar.xz` packages.
	# this also avoids corrupt upstream metadata (ALARM)
	msg2 "Adding whitelisted packages to clean %s and %s ..." \
		"${db_file##*/}" "${files_file##*/}"
	rm "$db_file" "$files_file"
	local pkgfiles=()
	case "$UPSTREAM" in
		packages|community)
			pkgfiles=($(sed "s|^|${into}|; s|$|${PKGEXT}|" "$whitelist"))
			;;
		archlinux32|archlinuxarm)
			pkgfiles=($(cat "$whitelist"))
			;;
	esac
	local UMASK=$(umask)
	umask 002
	repo-add "$db_file" "${pkgfiles[@]}"
	umask "$UMASK" >/dev/null

	msg2 "Updating %s-%s databases" "$2" "$1"
	mkdir -p -- "$into"
	rsync "${extra[@]}" --no-motd -rtlpH \
		--delay-updates \
		--safe-links \
		"$from" "$into"
}

# Main function. Process the databases and get the libre packages
# Outline:
#  1. Fetch package info
#     * Get blacklist.txt
#     * Get repo.db from an Arch-like repo
#  2. Fetch the packages we want
#     * Create sync whitelist (based on package blacklist)
#     * Call sync_repo and/or sync_pool to fetch packages and signatures
#  3. Put the packages in the repos
#     * Create new repo.db with them (repo-add)
#     * rsync scratch directory => repos
main() {
	##############################################################
	# 0. Initialization                                          #
	##############################################################

	# Run as `V=true db-import-pkg` to get verbose output
	readonly VERBOSE=${V}
	extra=()
	${VERBOSE} && extra+=(-v)
	readonly extra
	readonly UPSTREAM=$1

	# Print usage message
	if [[ $# -ne 1 ]] || ! in_array "$UPSTREAM" "${UPSTREAMS[@]}" ; then
		IFS='|'
		msg 'usage: [V=true] %s {%s}' "${0##*/}" "${UPSTREAMS[*]}"
		exit $EXIT_INVALIDARGUMENT
	fi

	local vars
	case "$UPSTREAM" in
		packages|community)       vars=(ARCHMIRROR ARCHTAGS ARCHPKGPOOL ARCHSRCPOOL) ;;
		archlinux32|archlinuxarm) vars=(ARCHMIRROR ARCHTAGS ARCHPKGPOOL            ) ;;
	esac
	load_conf "$(dirname "$(readlink -e "$0")")/config" DBEXT FILESEXT FTP_BASE
	load_conf "$(dirname "$(readlink -e "$0")")/db-import-${UPSTREAM}.conf" "${vars[@]}"

	WORKDIR=$(mktemp -dt "${0##*/}.XXXXXXXXXX")
	trap "rm -rf -- ${WORKDIR@Q}" EXIT

	##############################################################
	# 1. Fetch package info                                      #
	##############################################################

	# Get the blacklisted packages
	blacklist-update

	# Sync the repos databases
	msg 'Retrieving .db and .files files'
	mkdir "${WORKDIR}/rsync"
	fetch_dbs "${ARCHMIRROR}/" "$WORKDIR/rsync"

	##############################################################
	# 2. Fetch the packages we want                              #
	##############################################################

	# Traverse all repo-arch pairs
	local whitelists=()
	local _tag reponame _arch
	for _tag in "${ARCHTAGS[@]}"; do
		reponame=${_tag%-*}
		_arch=${_tag##*-}
		msg "Processing %s-%s" "${_arch}" "${reponame}"
		_dir=$(get_repo_workdir "${reponame}" "${_arch}")
		local db_file="${_dir}/${reponame}${DBEXT}"
		make_whitelist "${WORKDIR}/${reponame}-${_arch}.whitelist" \
			"$db_file"
		case "$UPSTREAM" in
			packages|community)
				# Append to whitelists array so that we can
				# later sync_pool() all packages
				whitelists+=("${WORKDIR}/${reponame}-${_arch}.whitelist")
				# Get repo packages (symlinks)
				sync_repo \
					"${ARCHMIRROR}/${reponame}/os/${_arch}/" \
					"${WORKDIR}/${reponame}-${_arch}.whitelist" \
					"${FTP_BASE}/${reponame}/os/${_arch}/"
				;;
			archlinux32|archlinuxarm)
				# Upstream doesn't use an $ARCHPKGPOOL
				filter_duplicates \
					< "${WORKDIR}/${reponame}-${_arch}.whitelist" \
					> "${WORKDIR}/${reponame}-${_arch}-nodups.whitelist"
				sync_pool "${ARCHMIRROR}/${_arch}/${reponame}/" \
					"${WORKDIR}/${reponame}-${_arch}-nodups.whitelist" \
					"${FTP_BASE}/${ARCHPKGPOOL}/"
				;;
		esac
	done

	case "$UPSTREAM" in
		packages|community)
			# Concatenate all whitelists, check for single *s just in case
			cat "${whitelists[@]}" | grep -v "^\*$" |
				sort -u > "${WORKDIR}/${UPSTREAM}-all.whitelist"
			# FIXME: make_whitelist() wildcards should be narrowed
			#        down to respect the architecture of the tag

			msg "Syncing package pool"
			sync_pool \
				"${ARCHMIRROR}}/${ARCHPKGPOOL}/" \
				"${WORKDIR}/${UPSTREAM}-all.whitelist" \
				"${FTP_BASE}/${ARCHPKGPOOL}/"

			msg "Syncing source pool"
			sync_pool \
				"${ARCHMIRROR}/${ARCHSRCPOOL}/" \
				"${WORKDIR}/${UPSTREAM}-all.whitelist" \
				"${FTP_BASE}/${ARCHSRCPOOL}/"
			;;
		archlinux32|archlinuxarm)
			msg "Generating symbolic links to pool"

			local _arch _repo
			for _tag in "${ARCHTAGS[@]}"; do
				_repo=${_tag%-*}
				_arch=${_tag##*-}
				make_repo_symlinks \
					"$ARCHPKGPOOL" \
					"${WORKDIR}/${_repo}-${_arch}.whitelist" \
					"$_repo" \
					"$_arch"
			done
			;;
	esac

	##############################################################
	# 3. Put the packages in the repos                           #
	##############################################################

	msg "Putting databases back in place"

	# FIXME: all repo DBs should be replaced at once (per architecture)
	for _tag in "${ARCHTAGS[@]}"; do
		_repo=${_tag%-*}
		_arch=${_tag##*-}
		make_repo_dbs "$_repo" "$_arch"
	done
}

main "$@"
