#!/usr/bin/perl

$LIST = 0;
$COUNT = 1;
$DBDUMP = 2;

sub usage {
	print STDERR "usage: cat2csv.pl command infile outfile \n";
	print STDERR "    [-b branch] [-l branchlistfile] [-s skipsample]\n";
	exit -1;
}

if ($#ARGV < 2) {
	usage();
}

$command = shift(@ARGV);
$infile = shift(@ARGV);
$outfile = shift(@ARGV);

for ($i = 0; $i <= $#ARGV; $i++) {
	if ($ARGV[$i] eq "-b") {
		if ($i == $#ARGV || defined($branchwanted)) {
			usage();
		}
		$i++;
		$branchwanted = $ARGV[$i];
	} elsif ($ARGV[$i] eq "-l") {
		if ($i == $#ARGV || defined($branchlistfile)) {
			usage();
		}
		$i++;
		$branchlistfile = $ARGV[$i];
	} elsif ($ARGV[$i] eq "-s") {
		if ($i == $#ARGV) {
			usage();
		}
		$i++;
		$skipsamplelist{$ARGV[$i]} = 1;
	} else {
		usage();
	}
}

if ($command eq "list") {
	$mode = $LIST;
} elsif ($command eq "count") {
	$mode = $COUNT;
	$countavg = 0;
} elsif ($command eq "countavg") {
	$mount = $COUNT;
	$countavg = 1;
} elsif ($command eq "dbdump") {
	$mode = $DBDUMP;
} else {
	usage();
}

open(INFILE, "<".$infile) || die $infile;
open(OUTFILE, ">".$outfile) || die $outfile;

#
# Gather information per-branch (optional).  Several special-purpose branches
# any             (match any branch)
# __nobranch      (reserved)
# HEAD            (no branch tag found in commit)
# __misc          (Isn't HEAD or RELENG_*)
#
if (!defined($branchwanted)) {
	$branchwanted = "";
}

if (defined($branchlistfile)) {
	open(BRANCHLISTFILE, ">".$branchlistfile) || die $branchlistfile;
}

# Build a nested hash data structure with counts of commits by each
# committer for each month
$branch = "__nobranch";
while (<INFILE>) {
	chop($_);
	if (/^[a-z]/) {
		#
		# New commit: complete processing on previous commit if
		# any.
		if (defined($committer) && defined($summarydate)) {
			if (!defined($branch) || $branch eq "__nobranch") {
				$branch = "HEAD";
				$branchlist{$branch}++;
			}
			$branch_any{$summarydate}{$committer}++;

			if ($branchwanted eq "__misc" &&
			    !($branch=~/^HEAD$/) && !($branch=~/^RELENG_/)) {
				$perbranch{"__misc"}{$summarydate}{$committer}++;
			} else {
				$perbranch{$branch}{$summarydate}{$committer}++;
			}
		}
		$branch = "__nobranch";

		($committer, $date, $time, $tz) = split(/\s+/);
		($year, $month, $day) = split(/[-\/]/, $date);
		if ($year < 100) {
			$year += 1900;
		}
		if ($month < 10) {
			$summarydate = "$year/$month";
		} else {
			$summarydate = "$year/$month";
		}
		#= $year * 12 + $month;
	} else {
		# Existing commit: extract any other useful information
		# that we can.
		if (/^  Branch:.* ([^ ]+)$/) {
			$branch = $1;
			$branchlist{$branch}++;
		} elsif (/^  Modified files: .*Branch: ([^ ]+)\)$/) {
			$branch = $1;
			$branchlist{$branch}++;
		}
	}
}

if (defined($branchlistfile)) {
	foreach $branch (sort keys %branchlist) {
		print BRANCHLISTFILE "$branch $branchlist{$branch}\n";
	}
	close(BRANCHLISTFILE);
}

if ($mode == $DBDUMP) {
	#
	# Don't actually generate csv files, rather, dump our associative
	# database of branch and commit information.
	#
	foreach $branch (keys %perbranch) {
		foreach $date (keys %{$perbranch{$branch}}) {
			if (!defined($skipsamplelist{$date})) {
				foreach $committer (keys
				    %{$perbranch{$branch}{$date}}) {
					print OUTFILE 
					    "$branch:$date:$committer:".
					    $perbranch{$branch}{$summarydate}{$committer}.
					    "\n";
				}
			}

		}
	}
	close(OUTFILE);
	exit 0;
}

if (!defined($branchwanted) || $branchwanted eq "" || $branchwanted eq "any") {
	%data = %branch_any;
} else {
	if (!defined(%{$perbranch{$branchwanted}}) &&
	    ($branchwanted ne "__misc")) {
		print STDERR "Branch $branchwanted not defined\n";
		#exit -1;
	} else {
		%data = %{$perbranch{$branchwanted}};
	}
}

# Build a list of committers across all months
foreach $month (keys %data) {
	if (!defined($skipsamplelist{$month})) {
		foreach $committer (keys %{$data{$month}}) {
			$committerhash{$committer} = 1;
		}
	}
}
@committerlist = sort keys %committerhash;

# print out the csv header line
print OUTFILE "month:commits:committers:";

if ($mode == $LIST) {
	print OUTFILE "committerlist\n";
} elsif ($mode == $COUNT) {
	$first = 1;
	foreach $committer (@committerlist) {
		if ($first == 1) {
			print OUTFILE $committer;
			$first = 0;
		} else {
			print OUTFILE ":".$committer;
		}
	}
	print OUTFILE "\n";
}

@monthlist = sort keys %data;

$index = 0;
for ($index = 0; $index <= $#monthlist; $index++) {
	$month = $monthlist[$index];
	if (defined($skipsamplelist{$month})) {
		next;
	}
	$numcommitsinmonth = 0;
	$numcommittersinmonth = 0;
	foreach $committer (keys %{$data{$month}}) {
		$numcommitsinmonth += $data{$month}{$committer};
		$numcommittersinmonth++;
	}
	print OUTFILE $month.":".$numcommitsinmonth.":".$numcommittersinmonth.":";

	if ($mode == $LIST) {
		$first = 1;
		foreach $committer (keys %{$data{$month}}) {
			if ($first == 1) {
				print OUTFILE $committer;
				$first = 0;
			} else {
				print OUTFILE ",".$committer;
			}
		}
	} elsif ($mode == $COUNT) {
		$first = 1;
		foreach $committer (@committerlist) {
			if ($countavg == 1) {
				if ($index == 0) {
					$monthone = "";
					$monthtwo = $monthlist[$index];
					$monththree = $monthlist[$index + 1];
				} elsif ($index == $#monthlist) {
					$monthone = $monthlist[$index - 1];
					$monthtwo = $monthlist[$index];
					$monththree = "";
				} else {
					$monthone = $monthlist[$index - 1];
					$monthtwo = $monthlist[$index];
					$monththree = $monthlist[$index + 1];
				}
				$valueone = $data{$monthone}{$committer};
				$valuetwo = $data{$monthtwo}{$committer};
				$valuethree = $data{$monththree}{$committer};

				$values = 0;
				if (defined($valueone)) {
					$values++;
				} else {
					$valueone = 0;
				}
				if (defined($valuetwo)) {
					$values++;
				} else {
					$valuetwo = 0;
				}
				if (defined($valuethree)) {
					$values++;
				} else {
					$valuethree = 0;
				}
				if ($values != 0) {
					$value = ($valueone + $valuetwo +
					    $valuethree) / $values;
				} else {
					$value = 0;
				}
			} else {
				if (defined($data{$month}{$committer})) {
					$value = $data{$month}{$committer};
				} else {
					$value = 0;
				}
			}
			if ($first == 1) {
				print OUTFILE $value;
				$first = 0;
			} else {
				print OUTFILE ":".$value;
			}
		}
	}
	print OUTFILE "\n";
}

#
# gnuplot gets unhappy if there are no data points, so inject a faux data
# point if there were otherwise none.
#
if ($#monthlist == 0) {
	print OUTFILE "0:0:0:";
	if ($mode == $LIST) {
		$first = 1;
		foreach $committer (keys %{$data{$month}}) {
			if ($first == 1) {
				printf OUTFILE $committer;
				$first = 0;
			} else {
				print OUTFILE ",".$committer;
			}
		}
	} elsif ($mode == $COUNT) {
		foreach $committer (@committerlist) {
			# No difference between countavg and regular if there
			# are no samples.
			if ($first == 1) {
				print OUTFILE 0;
				$first = 0;
			} else {
				print OUTFILE ":".0;
			}
		}
	}
	print OUTFILE "\n";
}

close(INFILE);
close(OUTFILE);
