#!/usr/bin/perl -w
# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict;
use v5.10.1;
use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/;
use autodie qw(open chmod close rename);
use Fcntl qw(:DEFAULT);
my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
usage: public-inbox-init NAME INBOX_DIR HTTP_URL ADDRESS [ADDRESS..]

  Initialize a public-inbox

required arguments:

  NAME                the name of the inbox
  INBOX_DIR           pathname the inbox
  HTTP_URL            HTTP (or HTTPS) URL
  ADDRESS             email address(es), may be specified multiple times

options:

  -V2                 use scalable public-inbox-v2-format(5)
  -L LEVEL            index level `basic', `medium', or `full' (default: full)
  --ng NEWSGROUP      set NNTP newsgroup name
  -c KEY=VALUE        set additional config option(s)
  --skip-artnum=NUM   NNTP article numbers to skip
  --skip-epoch=NUM    epochs to skip (-V2 only)
  -j JOBS             number of indexing jobs (-V2 only), (default: 4)

See public-inbox-init(1) man page for full documentation.
EOF

require PublicInbox::Admin;
PublicInbox::Admin::require_or_die('-base');

my ($version, $indexlevel, $skip_epoch, $skip_artnum, $jobs, $show_help);
my $skip_docdata;
my $ng = '';
my (@c_extra, @chdir);
my %opts = (
	'V|version=i' => \$version,
	'L|index-level|indexlevel=s' => \$indexlevel,
	'S|skip|skip-epoch=i' => \$skip_epoch,
	'skip-artnum=i' => \$skip_artnum,
	'j|jobs=i' => \$jobs,
	'ng|newsgroup=s' => \$ng,
	'skip-docdata' => \$skip_docdata,
	'help|h' => \$show_help,
	'c=s@' => \@c_extra,
	'C=s@' => \@chdir,
);
my $usage_cb = sub {
	print STDERR $help;
	exit 1;
};
GetOptions(%opts) or $usage_cb->();
if ($show_help) { print $help; exit 0 };
my $name = shift @ARGV or $usage_cb->();
my $inboxdir = shift @ARGV or $usage_cb->();
my $http_url = shift @ARGV or $usage_cb->();
my (@address) = @ARGV;
@address or $usage_cb->();
PublicInbox::Admin::do_chdir(\@chdir);

@c_extra = map {
	my ($k, $v) = split(/=/, $_, 2);
	defined($v) or die "Usage: -c KEY=VALUE\n";
	$k =~ /\A[a-z]+\z/i or die "$k contains invalid characters\n";
	$k = lc($k);
	if ($k eq 'newsgroup') {
		die "newsgroup already set ($ng)\n" if $ng ne '';
		$ng = $v;
		();
	} elsif ($k eq 'address') {
		push @address, $v; # for conflict checking
		();
	} elsif ($k =~ /\A(?:inboxdir|mainrepo)\z/) {
		die "$k not allowed via -c $_\n"
	} elsif ($k eq 'indexlevel') {
		defined($indexlevel) and
			die "indexlevel already set ($indexlevel)\n";
		$indexlevel = $v;
		();
	} else {
		$_
	}
} @c_extra;

PublicInbox::Admin::indexlevel_ok_or_die($indexlevel) if defined $indexlevel;

$ng =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]! and
	die "--newsgroup `$ng' is not valid\n";
($ng =~ m!\A\.! || $ng =~ m!\.\z!) and
	die "--newsgroup `$ng' must not start or end with `.'\n";

require PublicInbox::Config;
my $pi_config = PublicInbox::Config->default_file;
my ($dir) = ($pi_config =~ m!(.*?/)[^/]+\z!);
require File::Path;
File::Path::mkpath($dir); # will croak on fatal errors

# first, we grab a flock to prevent simultaneous public-inbox-init
# processes from trampling over each other, or exiting with 255 on
# O_EXCL failure below.  This gets unlocked automatically on exit:
require PublicInbox::Lock;
my $lock_obj = { lock_path => "$pi_config.flock" };
PublicInbox::Lock::lock_acquire($lock_obj);

# git-config will operate on this (and rename on success):
require File::Temp;
my $fh = File::Temp->new(TEMPLATE => 'pi-init-XXXX', DIR => $dir);

# Now, we grab another lock to use git-config(1) locking, so it won't
# wait on the lock, unlike some of our internal flock()-based locks.
# This is to prevent direct git-config(1) usage from clobbering our
# changes.
my $lockfile = "$pi_config.lock";
my $lockfh;
sysopen($lockfh, $lockfile, O_RDWR|O_CREAT|O_EXCL) or do {
	warn "could not open config file: $lockfile: $!\n";
	exit(255);
};
require PublicInbox::OnDestroy;
my $auto_unlink = PublicInbox::OnDestroy::on_destroy(sub { unlink $lockfile });
my $perm = 0644 & ~umask;
my %seen;
if (-e $pi_config) {
	require PublicInbox::IO;
	open(my $oh, '<', $pi_config);
	my @st = stat($oh) or die "(f)stat failed on $pi_config: $!\n";
	$perm = $st[2];
	chmod($perm & 07777, $fh);
	print $fh PublicInbox::IO::read_all($oh);
	close $oh;

	# yes, this conflict checking is racy if multiple instances of this
	# script are run by the same $PI_DIR
	my $cfg = PublicInbox::Config->new;
	my $conflict;
	foreach my $addr (@address) {
		my $found = $cfg->lookup($addr);
		if ($found) {
			if ($found->{name} ne $name) {
				print STDERR
					"`$addr' already defined for ",
					"`$found->{name}',\n",
					"does not match intend `$name'\n";
				$conflict = 1;
			} else {
				$seen{lc($addr)} = 1;
			}
		}
	}

	exit(1) if $conflict;

	my $ibx = $cfg->lookup_name($name);
	$indexlevel //= $ibx->{indexlevel} if $ibx;
}
my $pi_config_tmp = $fh->filename;
close($fh);

my $pfx = "publicinbox.$name";
my @x = (qw/git config/, "--file=$pi_config_tmp");

$inboxdir = PublicInbox::Config::rel2abs_collapsed($inboxdir);
die "`\\n' not allowed in `$inboxdir'\n" if index($inboxdir, "\n") >= 0;

if (-f "$inboxdir/inbox.lock") {
	if (!defined $version) {
		$version = 2;
	} elsif ($version != 2) {
		die "$inboxdir is a -V2 inbox, -V$version specified\n"
	}
} elsif (-d "$inboxdir/objects") {
	if (!defined $version) {
		$version = 1;
	} elsif ($version != 1) {
		die "$inboxdir is a -V1 inbox, -V$version specified\n"
	}
}

$version = 1 unless defined $version;

if ($version == 1 && defined $skip_epoch) {
	die "--skip-epoch is only supported for -V2 inboxes\n";
}

my $ibx = PublicInbox::Inbox->new({
	inboxdir => $inboxdir,
	name => $name,
	version => $version,
	-primary_address => $address[0],
	indexlevel => $indexlevel,
});

my $creat_opt = {};
if (defined $jobs) {
	die "--jobs is only supported for -V2 inboxes\n" if $version == 1;
	die "--jobs=$jobs must be >= 1\n" if $jobs <= 0;
	$creat_opt->{nproc} = $jobs;
}

require PublicInbox::InboxWritable;
$ibx = PublicInbox::InboxWritable->new($ibx, $creat_opt);
if ($skip_docdata) {
	$ibx->{indexlevel} //= 'full'; # ensure init_inbox writes xdb
	$ibx->{indexlevel} eq 'basic' and
		die "--skip-docdata ignored with --indexlevel=basic\n";
	$ibx->{-skip_docdata} = $skip_docdata;
}
$ibx->init_inbox(0, $skip_epoch, $skip_artnum);

my $f = "$inboxdir/description";
if (sysopen $fh, $f, O_CREAT|O_EXCL|O_WRONLY) {
	print $fh "public inbox for $address[0]\n";
	close $fh;
}

# needed for git prior to v2.1.0
umask(0077);

require PublicInbox::Spawn;
PublicInbox::Spawn->import(qw(run_die));

foreach my $addr (@address) {
	next if $seen{lc($addr)};
	run_die([@x, "--add", "$pfx.address", $addr]);
}
run_die([@x, "$pfx.url", $http_url]);
run_die([@x, "$pfx.inboxdir", $inboxdir]);

if (defined($indexlevel)) {
	run_die([@x, "$pfx.indexlevel", $indexlevel]);
}
run_die([@x, "$pfx.newsgroup", $ng]) if $ng ne '';

for my $kv (@c_extra) {
	my ($k, $v) = split(/=/, $kv, 2);
	# git 2.30+ has --fixed-value for idempotent invocations,
	# but that's too new to depend on in 2021.  Perl quotemeta
	# seems compatible enough for POSIX ERE which git uses
	my $re = '^'.quotemeta($v).'$';
	run_die([@x, qw(--replace-all), "$pfx.$k", $v, $re]);
}

# needed for git prior to v2.1.0
chmod($perm & 07777, $pi_config_tmp);
rename $pi_config_tmp, $pi_config;
undef $auto_unlink; # trigger ->DESTROY
