#!/bin/bash
# Syncs Arch, ALARM or Arch32 repos based on info contained in the
# accompanying .conf files.
# License: GPLv3

set -eE
source "$(librelib messages)"
source "$(librelib blacklist)"
source "$(librelib conf)"
setup_traps

readonly -a UPSTREAMS=(packages community archlinux{32,arm})

# usage: fetch_dbs <from> <into>
#
# Fetch excluding everything but db files
# TODO: we could be doing without things other than what is in
#       ${ARCHTAGS[@]}
fetch_dbs() {
	rsync "${extra[@]}" --no-motd -mrtlH --no-p \
		--include="*/" \
		--include="*.db" \
		--include="*${DBEXT}" \
		--include="*.files" \
		--include="*${FILESEXT}" \
		--exclude="*" \
		--delete-after \
		"$1" "$2"
}

# usage: get_repo_dir <repo> <arch>
#
# Prints repo directory path fo rthe given <repo> <arch> combination,
# relative to the rsync root.
get_repo_dir() {
	repo=$1 arch=$2 envsubst '$repo $arch' <<<"$ARCHMIRROR_path"
}

# usage: db_list_pkgs <path-to-db>
#
# Prints a list of packages within a given <path-to-db>, one-per-line,
# in the format:
#
#     pkgname [epoch:]pkgver-pkgver
db_list_pkgs() {
	bsdtar tf "${1}" |
		cut -d "/" -f 1 |
		sed -r 's/-([^-]*-[^-]*)$/ \1/' |
		sort -u
}

# usage: filter_blacklisted <FULL_LIST >WHITE_LIST
#
# Given a list of packages in the format:
#
#     pkgname [epoch:]pkgver-pkgver
#
# filter out all of the packages named in blacklist.txt.
filter_blacklisted() {
	sort -u | join -v1 \
		- \
		<(blacklist-cat | blacklist-get-pkg | sort -u)
}

# usage: < <whitelist> filter_duplicates
#
# Don't import arch=(any) packages present elsewhere, it confuses parabolaweb.
# This reads a whitelist from stdin and prints it without said duplicates.
filter_duplicates() {
	grep -vf <(find "${FTP_BASE}/pool/" \
	                -name "*-any${PKGEXT}" \
	                -printf "%f\n" | sed  's/-any\.pkg.*/*/') --
}

# usage: sync_pool <from> <path-to-whitelist> <into>
#
# Sync excluding everything but whitelist
sync_pool() {
	local -r _from=$1 _whitelist=$2 _into=$3

	mkdir -p -- "$_into"
	msg2 "Retrieving %d packages from %s pool" \
		"$(wc -l < "$_whitelist")" \
		"$(basename "$_from")"

	# *Don't delete-after*, this is the job of
	# cleanup scripts. It will remove our packages too
	rsync "${extra[@]}" --no-motd -rtlH \
		--delay-updates \
		--safe-links \
		--include-from="$_whitelist" \
		--exclude="*" \
		"$_from" \
		"$_into"
}

# usage: sync_repo <from> <path-to-whitelist> <into>
#
# Sync excluding everything but whitelist.
# TODO: this is too similar to sync_pool(). Merge?
sync_repo() {
	local -r _from=$1 _whitelist=$2 _into=$3
	mkdir -p -- "$_into"
	msg2 "Retrieving %d files from repo" \
		"$(wc -l < "$_whitelist")"

	# We delete here for cleanup
	rsync "${extra[@]}" --no-motd -rtlH \
		--delete-after \
		--delete-excluded \
		--delay-updates \
		--include-from="$_whitelist" \
		--exclude="*" \
		"$_from" \
		"$_into"
}

# usage: make_repo_symlinks <pool> <path-to-whitelist> <repo> <arch>
#
# Generate symbolic links to target packages <repo-whitelist> lying in
# some of our <pool>s, and put them in $FTP_BASE/<repo>/os/<arch>.
#
# Use this after `sync_pool`ing from an upstream with no pool(s) and
# therefore no symlinks inside <repo>/os/<arch>.
make_repo_symlinks() {
	local -r pool=$1 whitelist=$2 repo=$3 arch=$4

	local repodir="${WORKDIR}/staging-rsync/${repo}-${arch}"

	msg2 "Putting symlinks in ${repo}/os/${arch}"
	mkdir -p -- "${repodir}"

	local pkgfile
	while read pkgfile; do
		local path="${FTP_BASE}/${pool}/${pkgfile}"
		if [[ ! -f "$path" ]]; then
			# pkg was an `any.pkg.tar.?z`, find which pool it's in.
			pkgfile=${pkgfile/${arch}/any}
			# HACK: Arch32 appends '.digits' to pkgrels. That
			# prevents us from finding the corresponding package.
			shopt -s extglob &&
				pkgfile=${pkgfile/.+([0-9])-any/-any} || :
			shopt -u extglob || :
			local any_pkgs=(${FTP_BASE}/pool/*/${pkgfile})
			path="${any_pkgs[0]}"
		fi
		# give up
		if [[ ! (-f "$path" && -f "${path}.sig") ]]; then
			warning "No file was found for %s, skipping" \
				"${pkgfile%-*}"
			continue
		fi
		local symlink="${repodir}/${path##*/}"
		ln -sfv "../../../pool/${path##*/pool/}" "$symlink"
		ln -sfv "../../../pool/${path##*/pool/}.sig" "${symlink}.sig"
		local -a new_whitelist+=($symlink)
	done < <(sed "s/*/-${arch}.pkg.tar.xz/" "$whitelist")
	printf -- '%s\n' "${new_whitelist[@]}" > "$whitelist"
}

# usage: make_repo_dbs <repo> <arch>
make_repo_dbs() {
	local -r from=${WORKDIR}/staging-rsync/${1}-${2}
	local -r into=${FTP_BASE}/${1}/os/${2}/
	local -r db_file=${from}/${1}${DBEXT}
	local -r files_file=${from}/${1}${FILESEXT}
	local -r whitelist=${WORKDIR}/${1}-${2}.whitelist

	# create fresh databases to reflect actual `any.pkg.tar.xz` packages.
	# this also avoids corrupt upstream metadata (ALARM)
	msg2 "Adding whitelisted packages to clean DBs ..."
	local pkgfiles=()
	case "$UPSTREAM" in
		packages|community)
			pkgfiles=($(sed "s|^|${into}|; s|$|${PKGEXT}|" "$whitelist"))
			;;
		archlinux32|archlinuxarm)
			pkgfiles=($(cat "$whitelist"))
			;;
	esac
	local UMASK=$(umask)
	umask 002
	repo-add "$db_file" "${pkgfiles[@]}"
	umask "$UMASK" >/dev/null

	mkdir -p -- "$into"
	# This bit is based on db-functions:set_repo_permission()
	local group=$(/usr/bin/stat --printf='%G' "${into}")
	chgrp "$group" "${db_file}"
	chgrp "$group" "${files_file}"
	chmod g+w "${db_file}"
	chmod g+w "${files_file}"

	msg2 "Updating %s-%s databases" "$2" "$1"
	rsync "${extra[@]}" --no-motd -rtlpH \
		--delay-updates \
		--delete-after \
		--links \
		"$from/" "$into"
}

# Main function. Process the databases and get the libre packages
# Outline:
#  1. Fetch package info
#     * Get blacklist.txt
#     * Get repo.db from an Arch-like repo
#  2. Figure out what we want
#     * Generate textfiles describing the current repo state, and
#       (using blacklist.txt) the desired repo state
#  3. Fetch the packages we want
#     * Create sync whitelist (based on package blacklist)
#     * Call sync_repo and/or sync_pool to fetch packages and signatures
#  4. Put the packages in the repos
#     * Create new repo.db with them (repo-add)
#     * rsync scratch directory => repos
main() {
	##############################################################
	# 0. Initialization                                          #
	##############################################################

	# Run as `V=true db-import-pkg` to get verbose output
	readonly VERBOSE=${V}
	extra=()
	${VERBOSE} && extra+=(-v)
	readonly extra
	readonly UPSTREAM=$1

	# Print usage message
	if [[ $# -ne 1 ]] || ! in_array "$UPSTREAM" "${UPSTREAMS[@]}" ; then
		IFS='|'
		msg 'usage: [V=true] %s {%s}' "${0##*/}" "${UPSTREAMS[*]}"
		exit $EXIT_INVALIDARGUMENT
	fi

	local vars
	case "$UPSTREAM" in
		packages|community)       vars=(ARCHMIRROR ARCHTAGS ARCHPKGPOOL ARCHSRCPOOL) ;;
		archlinux32|archlinuxarm) vars=(ARCHMIRROR ARCHTAGS ARCHPKGPOOL            ) ;;
	esac
	load_conf "$(dirname "$(readlink -e "$0")")/config" DBEXT FILESEXT FTP_BASE
	load_conf "$(dirname "$(readlink -e "$0")")/db-import-${UPSTREAM}.conf" "${vars[@]}"

	declare -rg ARCHMIRROR_path="${ARCHMIRROR#rsync://*/*/}"
	declare -rg ARCHMIRROR_fullmodule="${ARCHMIRROR%"/${ARCHMIRROR_path}"}"

	WORKDIR=$(mktemp -dt "${0##*/}.XXXXXXXXXX")
	trap "rm -rf -- ${WORKDIR@Q}" EXIT

	##############################################################
	# 1. Fetch package info                                      #
	##############################################################

	# Get the blacklisted packages
	blacklist-update

	# Sync the repos databases
	msg 'Downloading .db and .files files'
	mkdir "${WORKDIR}/rsync"
	fetch_dbs "${ARCHMIRROR_fullmodule}/" "$WORKDIR/rsync"

	##############################################################
	# 2. Figure out what we want                                 #
	##############################################################

	mkdir "${WORKDIR}"/{old,new,dif}
	local _tag _repo _arch db_file
	for _tag in "${ARCHTAGS[@]}"; do
		_repo=${_tag%-*}
		_arch=${_tag##*-}
		# FIXME: this assumes that the local DBEXT and the
		# imported DBEXT are the same, which is potentially
		# not true.
		#
		# FIXME: this should use db-functions to lock the
		# repos while we read them.
		db_file="${FTP_BASE}/${_repo}/os/${_arch}/${_repo}${DBEXT}"
		db_list_pkgs "$db_file" > "${WORKDIR}/old/${_tag}.txt"

		db_file="${WORKDIR}/rsync/$(get_repo_dir "${_repo}" "${_arch}")/${_repo}${DBEXT}"
		db_list_pkgs "$db_file" | filter_blacklisted > "${WORKDIR}/new/${_tag}.txt"
	done

	# We now have $WORKDIR/old/ describing the way the repos are,
	# and $WORKDIR/new/ describing the way we want them to be.  We
	# now create $WORKDIR/dif/ describing how to get from point A
	# to point B.
	#
	# TODO: finish this section
	for _tag in "${ARCHTAGS[@]}"; do
		comm -23 "${WORKDIR}"/{old,new}/"${_tag}.txt" # take packages that have been "removed"
	done | grep -rFx -f /dev/stdin "${WORKDIR}/new/" | # but now appear in another repo
		sort -u > "${WORKDIR}/dif/moved.txt"
	comm -23 \
		<(cat "${WORKDIR}"/old/* | cut -d' ' -f1 | sort -u) \
		<(cat "${WORKDIR}"/new/* | cut -d' ' -f1 | sort -u) \
		> "${WORKDIR}/dif/removed.txt"

	##############################################################
	# 3. Fetch the packages we want                              #
	##############################################################

	# OK, now we have $WORKDIR/old/ describing the way the repos
	# are, $WORKDIR/new/ describing the way we want them to be,
	# and $WORKDIR/dif/ describing how to get from `old` to `new`.
	# We should (TODO) now use db-move, db-update, and db-remove
	# to apply that diff.
	#
	# But,
	#  - db-move is broken
	#  - The code that populates /dif/ isn't finished
	# So, just nuke the current repos and entirely re-create
	# everything from /new/.

	mkdir "${WORKDIR}/staging-rsync"

	local whitelists=()
	local reponame
	for _tag in "${ARCHTAGS[@]}"; do
		reponame=${_tag%-*}
		_arch=${_tag##*-}
		msg "Processing %s-%s" "${_arch}" "${reponame}"
		# Create a whitelist, add * wildcard to end.
		#
		# FIXME: due to lack of -arch suffix, the pool sync
		# retrieves every arch even if we aren't syncing them.
		#
		# IMPORTANT: the . in the sed command is needed
		# because an empty whitelist would consist of a single
		# * allowing any package to pass through.
		sed -e 's/ /-/' -e 's|.$|&*|g' <"${WORKDIR}/new/${_tag}.txt" >"${WORKDIR}/${reponame}-${_arch}.whitelist"
		case "$UPSTREAM" in
			packages|community)
				# Append to whitelists array so that we can
				# later sync_pool() all packages
				whitelists+=("${WORKDIR}/${reponame}-${_arch}.whitelist")
				# Get repo packages (symlinks)
				mkdir "${WORKDIR}/staging-rsync/${_tag}"
				ln -t "${WORKDIR}/staging-rsync/${_tag}" \
					"${FTP_BASE}/${reponame}/os/${_arch}"/*
				sync_repo \
					"${ARCHMIRROR_fullmodule}/$(get_repo_dir "${reponame}" "${_arch}")/" \
					"${WORKDIR}/${reponame}-${_arch}.whitelist" \
					"${WORKDIR}/staging-rsync/${_tag}/"
				;;
			archlinux32|archlinuxarm)
				# Upstream doesn't use an $ARCHPKGPOOL
				filter_duplicates \
					< "${WORKDIR}/${reponame}-${_arch}.whitelist" \
					> "${WORKDIR}/${reponame}-${_arch}-nodups.whitelist"
				sync_pool "${ARCHMIRROR_fullmodule}/${_arch}/${reponame}/" \
					"${WORKDIR}/${reponame}-${_arch}-nodups.whitelist" \
					"${FTP_BASE}/${ARCHPKGPOOL}/"
				;;
		esac
	done

	case "$UPSTREAM" in
		packages|community)
			# Concatenate all whitelists, check for single *s just in case
			cat "${whitelists[@]}" | grep -v "^\*$" |
				sort -u > "${WORKDIR}/${UPSTREAM}-all.whitelist"
			# FIXME: make_whitelist() wildcards should be narrowed
			#        down to respect the architecture of the tag

			msg "Syncing package pool"
			sync_pool \
				"${ARCHMIRROR_fullmodule}/${ARCHPKGPOOL}/" \
				"${WORKDIR}/${UPSTREAM}-all.whitelist" \
				"${FTP_BASE}/${ARCHPKGPOOL}/"

			msg "Syncing source pool"
			sync_pool \
				"${ARCHMIRROR_fullmodule}/${ARCHSRCPOOL}/" \
				"${WORKDIR}/${UPSTREAM}-all.whitelist" \
				"${FTP_BASE}/${ARCHSRCPOOL}/"
			;;
		archlinux32|archlinuxarm)
			msg "Generating symbolic links to pool"

			local _arch _repo
			for _tag in "${ARCHTAGS[@]}"; do
				_repo=${_tag%-*}
				_arch=${_tag##*-}
				make_repo_symlinks \
					"$ARCHPKGPOOL" \
					"${WORKDIR}/${_repo}-${_arch}.whitelist" \
					"$_repo" \
					"$_arch"
			done
			;;
	esac

	##############################################################
	# 4. Put the packages in the repos                           #
	##############################################################

	msg "Putting databases back in place"

	# FIXME: all repo DBs should be replaced at once (per architecture)
	for _tag in "${ARCHTAGS[@]}"; do
		_repo=${_tag%-*}
		_arch=${_tag##*-}
		make_repo_dbs "$_repo" "$_arch"
	done
}

main "$@"
