#!/usr/bin/perl

use strict;
use warnings;
use utf8;

use LWP::Simple;
use File::Basename;
use POSIX qw(strftime);

our $SPECS_URL = 'http://www.xmpp.org/extensions/all.shtml';

# OR'd regex.
our $TYPE_EXCL1 = qr/(Humorous|JIG Formation|Procedural)/io;
our $STATUS_EXCL1 = qr/(Deferred|Obsolete|Rejected|Retracted)/io;

# AND'd regex.
our $TYPE_EXCL2 = qr/(Informational)/io;
our $STATUS_EXCL2 = qr/(Deprecated)/io;

our @RFCS = (
	{
		'id'   => 'RFC 3920',
		'url'  => 'http://www.xmpp.org/rfcs/rfc3920.html',
		'name' => 'XMPP Core'
	},
	{
		'id'   => 'RFC 3921',
		'url'  => 'http://www.xmpp.org/rfcs/rfc3921.html',
		'name' => 'XMPP Instant Messaging and Presence'
	},
	{
		'id'   => 'RFC 3923',
		'url'  => 'http://www.xmpp.org/rfcs/rfc3923.html',
		'name' => 'XMPP End-to-End Signing and Object Encryption'
	}
);
sub main {
	my ($specs_url) = @_;

	$specs_url = $SPECS_URL unless defined $specs_url;

	# Fetch page.
	my $content = get($specs_url);
	unless (defined $content) {
		print STDERR "Can't retrieve URL '$specs_url'\n";
		return (1);
	}

	my $line;
	my @xeps = @RFCS;
	my $xep;

	# Walk through the lines of $content.
	foreach $line (split /[\r\n]+/, $content) {
		if ($line =~ /^<tr class='tablebody'>/o) {
			# XEP entry start.
			push @xeps, $xep if %$xep;
			$xep = {};
		} elsif (!exists $xep->{'id'} &&
		    $line =~ /^<td valign='top'><a href='([^']+)'>([^<]+)/o) {
			# ID and URL.
			$xep->{'id'} = $2;
			$xep->{'url'} = $1;
		} elsif ($line =~ /^<td valign='top'>([^<]+)/o) {
			if (!exists $xep->{'name'}) {
				$xep->{'name'} = $1;
			} elsif (!exists $xep->{'type'}) {
				$xep->{'type'} = $1;
			} elsif (!exists $xep->{'status'}) {
				$xep->{'status'} = $1;
			} elsif (!exists $xep->{'date'}) {
				$xep->{'date'} = $1;
			}
		}
	}
	push @xeps, $xep if %$xep;

	# If the XEP list is empty, exit.
	return (0) unless (@xeps);

	# Print header.
	my $script = basename($0);
	my $revision = '$Revision$' =~ /Revision: ([\d]+)/ ? $1 : 0;
	my $now = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime;

	print << "EOF";
# Generated by $script r$revision on $now
# vim:ft=conf:
EOF

	foreach $xep (@xeps) {
		# Skip unwanted XEPs.
		my $type = $xep->{'type'} || "";
		my $status = $xep->{'status'} || "";
		next if ($type =~ $TYPE_EXCL1 || $status =~ $STATUS_EXCL1);
		next if ($type =~ $TYPE_EXCL2 && $status =~ $STATUS_EXCL2);

		# Print XEP entry.
		my $id = $xep->{'id'};
		my $url = $xep->{'url'};
		my $name = $xep->{'name'};
		my $date = $xep->{'date'} || "";

		print << "EOF";

# --------------------------------------------------------------------
XEP:		$id
NAME:		$name
URL:		$url
EOF
		print "STATUS:		$status\n" if $status;
		print "TYPE:		$type\n" if $type;
		print "DATE:		$date\n" if $date;
	}

	return (0);
}

main(@ARGV);
