#!/usr/bin/perl # git-chart: plot a distribution of commits over time # # Copyright (C) 2011-2017 Giuseppe Bilotta # # Licensed under the Mozilla Public License, version 2.0 # Refer to the attached LICENSE file for details. # List of general TODO # * default to something else than "the whole history", probably # * if no commits are specified and a commit grouping step is specified, # limit commits proportionally (e.g. --daily => one week worth of commits) # * allow the steps to be counted from the most recent rather than from the # oldest commit use strict; use warnings; use POSIX qw(ceil strftime); use Getopt::Long; use Date::Parse; use File::Spec; use File::Temp; use List::Util 'max'; our $VERSION = '17.04'; my $SECS_PER_DAY = 24*3600; my %steps = ( hourly => 3600, daily => $SECS_PER_DAY, weekly => 7*$SECS_PER_DAY, monthly => 30*$SECS_PER_DAY, yearly => 12*30*$SECS_PER_DAY, ); my %trunc = ( hourly => "%Y-%m-%d %H:00", daily => "%Y-%m-%d 00:00", monthly => "%Y-%m-01 00:00", yearly => "%Y-01-01 00:00", ); sub usage() { # TODO print <. By default, commits are grouped by hour, day, week, month or year depending on the time spanned, but this can be controlled by the user. Options: --hourly, --daily, --weekly, --monthly, --yearly: force a specific commit grouping --step=: force commits to be grouped each seconds --gnuplot: produce a chart with gnuplot (default) --google: produce a chart with Google Charts --chart-height=: set the Google Charts height; the width is set to a 4:3 ratio (default: 100) EOU } sub gather_data($) { my $options = shift; print "Gathering data ...\n"; my @commits; open my $fd, '-|', qw(git log --date=local), '--pretty=%aN%x00%ad%x00%s', @{$options->{cmdline}}; die "failed to get revs" unless $fd; while (<$fd>) { chomp; my ($author, $date, $sub) = split '\0', $_, 3; push @commits, { date => str2time($date), author => $author }; } close $fd; @commits = sort { $a->{date} cmp $b->{date} } @commits; print "...done, " . @commits . " commits.\n"; die "No commits!" if (@commits < 1); my $gap = $commits[$#commits]->{date} - $commits[0]->{date}; my $step; if (defined $options->{step}) { $step = $options->{step}; } else { $step = $gap > $steps{yearly} ? 'monthly' : $gap > $steps{monthly} ? 'weekly' : $gap > $steps{weekly}/2 ? 'daily' : 'hourly' ; $options->{step} = $step; } # truncate each commit (date) to the wanted step # e.g. if the step is 'monthly', then commit YYYY/MM/DD # maps to YYYY/MM my %dataset; for my $commit (@commits) { my $date = $commit->{date}; my $author = $commit->{author}; my $key; if (exists $trunc{$step}) { # LOL -- no, seriously, is there a smarter way to do this? $key = str2time(strftime($trunc{$step}, localtime($date))); } elsif ($step eq 'weekly') { # horrible special case: do it daily, and then subtract # as many days as necessary to go back to the previous sunday # TODO config week start $key = str2time(strftime($trunc{daily}, localtime($date))); my $wd = strftime("%w", localtime($date)); $key -= $wd*$steps{daily}; } else { $key = $date - ($date % $step); } # make sure the value is a hash $dataset{$key} = {_commits => 0} unless exists $dataset{$key}; $dataset{_authors} = { $author => 0} unless exists $dataset{_authors}; $dataset{$key}{_commits} += 1; $dataset{$key}{$author} += 1; $dataset{_authors}{$author} += 1; } # fill missing steps and find max my @keys = sort keys %dataset; pop @keys; # get rid of _authors # ensure numeric step, taking into account # that our numerical steps are approximate my $tolerance = 0; if (exists $steps{$step}) { $step = $steps{$step}; $tolerance = $step/2; } my $max = 0; my $key = shift @keys; while (1) { $max = $dataset{$key}{_commits} if $max < $dataset{$key}{_commits}; my $next_step = $key + $step; my $next = shift @keys; last unless $next; while (abs($next - $next_step) > $tolerance) { # next step is too far away $dataset{$next_step} = {_commits => 0}; $next_step += $step; } $key = $next; } $options->{max} = $max; return \%dataset; } # functions to plot the datasets. # each function can be called with either one or two parameters. # when called with two parameters, the first is assumed to be the dataset, and the second the options # (array and hash ref respectively). # when called with a single parameter, it is assumed to be an options hash ref, and the dataset is # created by calling gather_data with the passed options. # google chart API # TODO needs a lot of customization sub google_chart($;$) { my $dataset = shift; my $options = shift; if (! defined $options) { $options = $dataset; $dataset = gather_data($options); } my $height=$options->{chart_height}; my $max = $options->{max}; my @keys = sort keys %$dataset; pop @keys; # get rid of _authors my $from = $keys[0]; my $to = $keys[@keys-1]; my @data; while (my $key = shift @keys) { push @data, $dataset->{$key}{_commits}; } my $width=ceil(4*$height/3); my $url="https://chart.googleapis.com/chart?chs=${width}x${height}&cht=bvg&chd=t:%s&chds=0,$max&chbh=a&chxt=y&chxr=0,0,$max"; my $launch = sprintf $url, join(",",@data); print $launch, "\n"; # `git web--browse "$launch"` } # gnuplot sub gnuplot_chart($;$) { my $dataset = shift; my $options = shift; if (! defined $options) { $options = $dataset; $dataset = gather_data($options); } my $authors = delete $dataset->{_authors}; my @authors = sort { $authors->{$b} <=> $authors->{$a} } keys %$authors; $authors = join "\t", map { "\"$_\"" } @authors; my @keys = sort keys %$dataset; my $step=$options->{step}; my $from = $keys[0]; my $to = $keys[$#keys]; my $data = "date\tcommits\t$authors\n"; while (my $key = shift @keys) { $data .= "$key\t$dataset->{$key}{_commits}"; foreach my $author (@authors) { if (exists $dataset->{$key}{$author}) { $data .= "\t$dataset->{$key}{$author}"; } else { $data .= "\t0"; } } $data .= "\n"; } my $max = $options->{max} + 1; # add a fake datapoint lest to prevent the last # datum from becoming invibile with style data steps if (exists $steps{$step}) { $to += $steps{$step}; } else { $to += $step; } $data .="$to\t0"; foreach my $author (@authors) { $data .= "\t0"; } $data .= "\n"; my ($formatx, $ticks); if ($to - $from > $steps{yearly}) { $formatx = "%Y"; $ticks = $steps{yearly}; } elsif ($to - $from > $steps{monthly}) { $formatx = "%b\\n%Y"; $ticks = $steps{monthly}; } elsif ($to - $from > $steps{weekly}/2) { $formatx = "%d\\n%b"; $ticks = $steps{daily}; } else { $formatx = "%H\\n%d"; $ticks = $steps{hourly}; } my $nticks = ceil(($to - $from)/$ticks); # Default terminal sizing. Rationale: # * authors are limited to 8 rows, so the resulting number of columns # should fit within some scaled 800 pixels. # and assuming that each colum takes about 600 scaled pixels my $sizemul = max(1, ceil(ceil(3*@authors/8)/4)); print "# $sizemul \n"; $sizemul = max($sizemul, $nticks/8); print "# $sizemul \n"; my $xres = 800*$sizemul; my $yres = 450*$sizemul; # TODO allow customization # in particular, detect (lack of) display and set term to dumb accordingly my $termcmd = $options->{gnuplot_term} || "set terminal GNUTERM enhanced size $xres,$yres\n"; my $plotsetup = $options->{gnuplot_setup} || @authors == 1 ? 'set nokey' : 'set key above autotitle columnheader maxrows 8' ; $plotsetup .= "\nset yrange [0:$max]\n"; $plotsetup .= "set xrange ['$from':'$to']\n"; $plotsetup .= "set format x \"$formatx\"\n"; $plotsetup .= "set xtics axis out nomirror $ticks\n"; $plotsetup .= "set ytics axis out nomirror\n"; my $plotstyle = $options->{gnuplot_style}; my $plotoptions = $options->{gnuplot_plotwith}; my $datafile = File::Temp->new( TEMPLATE => 'git-chart-XXXXXX', DIR => File::Spec->tmpdir() ); print $datafile $data; open my $gp, "|gnuplot -persist"; my $plotcmd = "plot '$datafile' using 1:2"; if (@authors > 1) { my $lastcolumn = $#authors + 3; $plotcmd.= " notitle"; $plotcmd.= ", for [a=3:$lastcolumn] '' using 1:a"; } my $gp_script = < [], # charting/plotting options plotter => \&gnuplot_chart, chart_height => 144, gnuplot_term => '', gnuplot_setup => '', gnuplot_style => 'set style data steps', gnuplot_plotwith => '', ); sub parse_step(@) { my $key = shift; my $step = shift; if (exists $steps{$key}) { $options{step} = $key; return; } die "this can't happen ($key)" unless $key eq 'step'; if ($step =~/^\d+$/) { $options{step} = 0 + $step; return } else { if (exists $steps{$step}) { $options{step} = $step; return; } die "unknown step $step"; } } my $help = 0; # read our options first Getopt::Long::Configure('pass_through'); GetOptions( 'hourly' => \&parse_step, 'daily' => \&parse_step, 'weekly' => \&parse_step, 'monthly' => \&parse_step, 'yearly' => \&parse_step, 'step=s' => sub { parse_step(@_) }, 'chart-height=i' => sub { $options{chart_height} = $_[1]}, google => sub { $options{plotter} = \&google_chart }, gnuplot => sub { $options{plotter} = \&gnuplot_chart }, 'help|?' => \$help, ); if ($help) { usage(); exit; } # if anything was left, check for log options if (@ARGV) { $options{cmdline} = \@ARGV; } $options{plotter}->(\%options);