Import MMD 2.0b5
[multimarkdown] / Utilities / table_cleanup.pl
1 #!/usr/bin/env perl
2 #
3 # $Id: table_cleanup.pl 482 2008-01-12 23:07:32Z fletcher $
4 #
5 # Cleanup the spacing and alignment of MultiMarkdown tables
6 #
7 # Used by my TextMate Bundle, but can be used elsewhere as well
8 #
9 # Copyright (c) 2006-2008 Fletcher T. Penney
10 #       <http://fletcherpenney.net/>
11 #
12 # MultiMarkdown Version 2.0.b5
13 #
14
15 local $/;
16 $text = <>;
17
18 my %max_width = ();
19 my @alignments = ();
20
21
22 # Reusable regexp's to match table
23 my $less_than_tab = 3;
24
25 my $line_start = qr{
26         [ ]{0,$less_than_tab}
27 }mx;
28
29 my $table_row = qr{
30         [^\n]*?\|[^\n]*?\n
31 }mx;
32         
33 my $first_row = qr{
34         $line_start
35         \S+.*?\|.*?\n
36 }mx;
37
38 my $table_rows = qr{
39         (?:\n?$table_row)
40 }mx;
41
42 my $table_caption = qr{
43         $line_start
44         \[.*?\][ \t]*\n
45 }mx;
46
47 my $table_divider = qr{
48         $line_start
49         [\|\-\+\:\.][ \-\+\|\:\.]*?\|[ \-\+\|\:\.]*
50 }mx;
51
52 my $whole_table = qr{
53         ($table_caption)?               # Optional caption
54         ($first_row                             # First line must start at beginning
55         ($table_row)*?)?                # Header Rows
56         $table_divider                  # Divider/Alignment definitions
57         $table_rows+                    # Body Rows
58         \n?[^\n]*?\|[^\n]*?             # Allow last row not to have a "\n" for cleaning while editing
59         ($table_caption)?               # Optional caption
60 }mx;
61
62
63 # Find whole tables, then break them up and process them
64
65 $text =~ s{
66         ^($whole_table)                 # Whole table in $1
67         (\n|\Z)                                 # End of file or 2 blank lines
68 }{
69         my $table = $1 . "\n";  
70         my $table_original = $table;
71         $result = "";
72         @alignments = ();
73         %max_width = ();
74         
75         # Strip Caption and Summary
76         $table =~ s/^$line_start\[\s*(.*?)\s*\](\[\s*(.*?)\s*\])?[ \t]*$//m;
77         $table =~ s/\n$line_start\[\s*(.*?)\s*\][ \t]*\n/\n/s;
78         
79         $table = "\n" . $table; 
80         # Need to be greedy
81         $table =~ s/\n($table_divider)\n($table_rows+)//s;
82         my $alignment_string = $1;
83         my $body = $2;
84         my $header = $table;
85
86         # Process column alignment
87         while ($alignment_string =~ /\|?\s*(.+?)\s*(\||\Z)/gs) {
88                 my $cell = $1;
89                 if ($cell =~ /\:$/) {
90                         if ($cell =~ /^\:/) {
91                                 push(@alignments,"center");
92                         } else {
93                                 push(@alignments,"right");
94                         }
95                 } else {
96                         if ($cell =~ /^\:/) {
97                                 push(@alignments,"left");
98                         } else {
99                                 if (($cell =~ /^\./) || ($cell =~ /\.$/)) {
100                                         push(@alignments,"char");
101                                 } else {
102                                         push(@alignments,"");
103                                 }
104                         }
105                 }
106         }
107
108         $table = $header . "\n" . $body;
109         
110         # First pass - find widest cell in each column (for single column cells only)
111         foreach my $line (split(/\n/, $table)) {
112                 my $count = 0;
113                 while ($line =~ /(\|?\s*[^\|]+?\s*(\|+|\Z))/gs) {
114                         my $cell = $1;          # Width of actual text in cell
115                         my $ending = $2;        # Is there a trailing `|`?
116                         
117                         if ($ending =~ /\|\|/) {
118                                 # For first pass, do single cells only
119                                 $count += (length($ending));
120                                 next;
121                         }
122
123                         setWidth($count, $cell);                        
124                         $count++
125                 }
126         }
127         
128         # Second pass - handle cells that span multiple rows
129         foreach my $line (split(/\n/, $table)) {
130                 my $count = 0;
131                 while ($line =~ /(\|?\s*[^\|]+?\s*(\|+|\Z))/gs) {
132                         my $cell = $1;          # Width of actual text in cell
133                         my $ending = $2;        # Is there a trailing `|`?
134                         
135                         if ($ending =~ /\|\|/) {
136                                 setWidth($count, $cell);                        
137                                 $count += (length($ending));
138                                 next;
139                         }
140                         $count++
141                 }
142         }
143         
144         # Fix length of alignment definitions
145         
146         $table_original =~ s{
147                 \n($table_divider)\n
148         }{
149                 my $divider = $1;
150                 my $count = 0;
151                 $divider =~ s{
152                         (\|?)\s*([^\|]+?)\s*(\|+|\Z)
153                 }{
154                         my $opening = $1;
155                         my $cell = $2;
156                         my $ending = $3;
157                         my $result = "";
158
159                         my $goal_length = $max_width{$count} -3;
160                         if ($count == 0) {
161                                 if ($opening eq ""){
162                                         $goal_length++;
163                                 } else {
164                                         $goal_length--;
165                                 }
166                         }
167                         if ($cell =~ /^\:/) {
168                                 $goal_length--;
169                                 $result = ":";
170                         }
171                         if ($cell =~ /\:$/) {
172                                 $goal_length--;
173                         }
174                         for (my $i=0;$i < $goal_length;$i++){
175                                 $result.="-";
176                         }
177                         if ($cell =~ /\:$/) {
178                                 $result .=":";
179                         }
180                         
181                         $count++;
182                         $opening . "$result" . $ending;
183                 }xsge;
184                 "\n$divider\n";
185         }sxe;
186
187         # Second pass - reformat table cells to appropriate width
188
189         $table_original =~ s{
190                 # match each line
191                 (.*)
192         }{
193                 $line = $1;
194                 my $result = "";
195                 my $count = 0;
196                 
197                 # Now process them
198                 
199                 if (($line =~ /^\[/) && ($line !~ /\|/)){
200                         $result .= $line;
201                 } else {
202                 while ($line =~ /(\|?)\s*([^\|]+?)\s*(\|+|\Z)/gs) {
203                         my $opening = $1;
204                         my $cell = $2;
205                         my $ending = $3;
206                         my $lead = 0;
207                         my $pad_lead = 0;
208                         my $pad_trail = 0;
209                         my $len = length($2);           # Length of actual contents
210                         
211                         # Not all first column cells have a leading `|`
212                         if ($count > 0) {
213                                 $pad_lead = 1;
214                         } elsif (length($opening) > 0) {
215                                 $pad_lead = 1;
216                         }
217
218                         # Buffer before trailing `|`
219                         if (length($ending) > 0) {
220                                 $pad_trail = 1;
221                         }
222
223                         # How much space to fill? (account for multiple columns)
224                         my $width = 0;
225                         if ($ending =~ /\|/) {
226                                 $width = maxWidth($count,length($ending));
227                         } else {
228                                 $width = maxWidth($count, 1);
229                         }
230                         
231                         if ($alignments[$count] =~ /^(left)?$/) {
232                                 $lead = $len + $pad_lead;
233                                 $trail = $width - $lead  - length($opening);
234                         }
235
236                         if ($alignments[$count] =~ /^right$/) {
237                                 if ($count == 0) {
238                                         if ($opening eq "") {
239                                                 $opening = "|";
240                                                 $pad_lead = 1;
241                                                 $width++;
242                                         }
243                                 }
244                                 $trail = $pad_trail+length($ending);
245                                 $lead = $width - $trail - length($opening);
246                         }
247                         
248                         if ($alignments[$count] =~ /^center$/) {
249                                 if ($count == 0) {
250                                         if ($opening eq "") {
251                                                 $opening = "|";
252                                                 $pad_lead = 1;
253                                                 $width++;
254                                         }
255                                 }
256                                 # Divide padding space
257                                 my $pad_total =  $width - $len;
258                                 $pad_lead = int($pad_total/2)+1;
259                                 $pad_trail = $pad_total - $pad_lead;
260                                 $trail = $pad_trail+length($ending);
261                                 $lead = $width - $trail - length($opening);
262                         }
263
264                         $result .= $opening . sprintf("%*s", $lead, $cell) . sprintf("%*s", $trail, $ending);
265                 
266                         if ($ending =~ /\|\|/) {
267                                 $count += (length($ending));
268                         } else {
269                                 $count++;
270                         }
271                 }
272                 }
273                 
274                 $result;
275         }xmge;
276         
277         $table_original;
278 }xsge;
279
280
281 print $text;
282
283
284 sub maxWidth {
285         # Return the total width for a range of columns
286         my ($start_col, $cols) = @_;
287         my $total = 0;
288         
289         for (my $i = $start_col;$i < ($start_col + $cols);$i++) {
290                 $total += $max_width{$i};
291         }
292         
293         return $total;
294 }
295
296 sub setWidth {
297         # Set widths for column(s) based on cell contents
298         my ($start_col, $cell) = @_;
299
300         $cell =~ /(\|?)\s*([^\|]+?)\s*(\|+|\Z)/;
301         my $opening =   $1;
302         my $contents =  $2;
303         my $closing =   $3;
304         
305         my $padding =   0;
306
307         $padding++ if (length($opening) > 0);   # For first cell
308         $padding++ if ($start_col > 0);                 # All cells except first definitely have an opening `|`
309         $padding++ if (length($closing) > 0);
310                                 
311         $contents =~ s/&\s*(.*?)\s*$/$1/;       # I don't remember what this does
312         
313         my $cell_length = length($contents) + $padding + length($opening)  + length($closing);
314         
315         if ($closing =~ /\|\|/) {
316                 # This cell spans multiple columns
317                 my @current_max = ();
318                 my $cols = length($closing);
319                 my $current_total = 0;
320                 
321                 for (my $i = $start_col;$i < ($start_col + $cols);$i++) {
322                         $current_total += $max_width{$i};
323                 }
324
325                 if ($current_total < $cell_length) {
326                         my %columns = ();
327                         # Proportionally divide extra space
328                         for (my $i = $start_col; $i < ($start_col + $cols);$i++) {
329                                 $max_width{$i} = int($max_width{$i} * ($cell_length/$current_total));
330                                 $columns{$i} = $max_width{$i};
331                         }
332                         $current_total = 0;
333                         for (my $i = $start_col;$i < ($start_col + $cols);$i++) {
334                                 $current_total += $max_width{$i};
335                         }
336                         my $missing = $cell_length - $current_total;
337
338                         # Now find the amount lost from fractions, and add back to largest columns
339                         foreach my $a_col (sort { $max_width{$b} <=> $max_width{$a} }keys %columns) {
340                                 if ($missing > 0) {
341                                         $max_width{$a_col}++;
342                                         $missing--;
343                                 }
344                         }
345                 }
346                 
347         } else {
348                 if ($max_width{$start_col}< $cell_length) {
349                         $max_width{$start_col} = $cell_length;
350                 }       
351         }
352         
353 }
354