#!/bin/sh # Convert ANSI (terminal) colours and attributes to HTML # Author: # http://www.pixelbeat.org/docs/terminal_colours/ # Examples: # ls -l --color=always | ansi2html.sh > ls.html # git show --color | ansi2html.sh > last_change.html # Generally one can use the `script` util to capture full terminal output. # Changes: # V0.1, 24 Apr 2008, Initial release # V0.2, 01 Jan 2009, Phil Harnish # Support `git diff --color` output by # matching ANSI codes that specify only # bold or background colour. # P@draigBrady.com # Support `ls --color` output by stripping # redundant leading 0s from ANSI codes. # Support `grep --color=always` by stripping # unhandled ANSI codes (specifically ^[[K). # V0.3, 20 Mar 2009, http://eexpress.blog.ubuntu.org.cn/ # Remove cat -v usage which mangled non ascii input. # Cleanup regular expressions used. # Support other attributes like reverse, ... # P@draigBrady.com # Correctly nest tags (even across lines). # Add a command line option to use a dark background. # Strip more terminal control codes. # V0.4, 17 Sep 2009, P@draigBrady.com # Handle codes with combined attributes and color. # Handle isolated attributes with css. # Strip more terminal control codes. # V0.12, 12 Jul 2011 # http://github.com/pixelb/scripts/commits/master/scripts/ansi2html.sh if [ "$1" = "--version" ]; then echo "0.12" && exit fi if [ "$1" = "--help" ]; then echo "This utility converts ANSI codes in data passed to stdin" >&2 echo "It has 2 optional parameters:" >&2 echo " --bg=dark --palette=linux|solarized|tango|xterm" >&2 echo "E.g.: ls -l --color=always | ansi2html.sh --bg=dark > ls.html" >&2 exit fi [ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; } if [ "$1" = "--palette=solarized" ]; then # See http://ethanschoonover.com/solarized P0=073642; P1=D30102; P2=859900; P3=B58900; P4=268BD2; P5=D33682; P6=2AA198; P7=EEE8D5; P8=002B36; P9=CB4B16; P10=586E75; P11=657B83; P12=839496; P13=6C71C4; P14=93A1A1; P15=FDF6E3; shift; elif [ "$1" = "--palette=solarized-xterm" ]; then # Above mapped onto the xterm 256 color palette P0=262626; P1=AF0000; P2=5F8700; P3=AF8700; P4=0087FF; P5=AF005F; P6=00AFAF; P7=E4E4E4; P8=1C1C1C; P9=D75F00; P10=585858; P11=626262; P12=808080; P13=5F5FAF; P14=8A8A8A; P15=FFFFD7; shift; elif [ "$1" = "--palette=tango" ]; then # Gnome default P0=000000; P1=CC0000; P2=4E9A06; P3=C4A000; P4=3465A4; P5=75507B; P6=06989A; P7=D3D7CF; P8=555753; P9=EF2929; P10=8AE234; P11=FCE94F; P12=729FCF; P13=AD7FA8; P14=34E2E2; P15=EEEEEC; shift; elif [ "$1" = "--palette=xterm" ]; then P0=000000; P1=CD0000; P2=00CD00; P3=CDCD00; P4=0000EE; P5=CD00CD; P6=00CDCD; P7=E5E5E5; P8=7F7F7F; P9=FF0000; P10=00FF00; P11=FFFF00; P12=5C5CFF; P13=FF00FF; P14=00FFFF; P15=FFFFFF; shift; else # linux console P0=000000; P1=AA0000; P2=00AA00; P3=AA5500; P4=0000AA; P5=AA00AA; P6=00AAAA; P7=AAAAAA; P8=555555; P9=FF5555; P10=55FF55; P11=FFFF55; P12=5555FF; P13=FF55FF; P14=55FFFF; P15=FFFFFF; [ "$1" = "--palette=linux" ] && shift fi [ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; } echo -n "
'

p='\x1b\['        #shortcut to match escape codes
P="\(^[^°]*\)¡$p" #expression to match prepended codes below

# Handle various xterm control sequences.
# See /usr/share/doc/xterm-*/ctlseqs.txt
sed "
s#\x1b[^\x1b]*\x1b\\\##g  # strip anything between \e and ST
s#\x1b][0-9]*;[^\a]*\a##g # strip any OSC (xterm title etc.)

#handle carriage returns
s#^.*\r\{1,\}\([^$]\)#\1#
s#\r\$## # strip trailing \r

# strip other non SGR escape sequences
s#[\x07]##g
s#\x1b[]>=\][0-9;]*##g
s#\x1bP+.\{5\}##g
s#${p}[0-9;?]*[^0-9;?m]##g

#remove backspace chars and what they're backspacing over
:rm_bs
s#[^\x08]\x08##g; t rm_bs
" |

# Normalize the input before transformation
sed "
# escape HTML
s#\&#\&#g; s#>#\>#g; s#<#\<#g; s#\"#\"#g

# normalize SGR codes a little

# split 256 colors out and mark so that they're not
# recognised by the following 'split combined' line
:e
s#${p}\([0-9;]\{1,\}\);\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m${p}¬\2m#g; t e
s#${p}\([34]8;5;[0-9]\{1,3\}\)m#${p}¬\1m#g;

:c
s#${p}\([0-9]\{1,\}\);\([0-9;]\{1,\}\)m#${p}\1m${p}\2m#g; t c   # split combined
s#${p}0\([0-7]\)#${p}\1#g                                 #strip leading 0
s#${p}1m\(\(${p}[4579]m\)*\)#\1${p}1m#g                   #bold last (with clr)
s#${p}m#${p}0m#g                                          #add leading 0 to norm

# undo any 256 color marking
s#${p}¬\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m#g;

# map 16 color codes to color + bold
s#${p}9\([0-7]\)m#${p}3\1m${p}1m#g;
s#${p}10\([0-7]\)m#${p}4\1m${p}1m#g;

# change 'reset' code to a single char, and prepend a single char to
# other codes so that we can easily do negative matching, as sed
# does not support look behind expressions etc.
s#°#\°#g; s#${p}0m#°#g
s#¡#\¡#g; s#${p}[0-9;]*m#¡&#g
" |

# Convert SGR sequences to HTML
sed "
:ansi_to_span # replace ANSI codes with CSS classes
t ansi_to_span # hack so t commands below only apply to preceeding s cmd

/^[^¡]*°/ { b span_end } # replace 'reset code' if no preceeding code

# common combinations to minimise html (optional)
s#${P}3\([0-7]\)m¡${p}4\([0-7]\)m#\1#;t span_count
s#${P}4\([0-7]\)m¡${p}3\([0-7]\)m#\1#;t span_count

s#${P}1m#\1#;                            t span_count
s#${P}4m#\1#;                       t span_count
s#${P}5m#\1#;                           t span_count
s#${P}7m#\1#;                         t span_count
s#${P}9m#\1#;                    t span_count
s#${P}3\([0-9]\)m#\1#;                    t span_count
s#${P}4\([0-9]\)m#\1#;                    t span_count

s#${P}38;5;\([0-9]\{1,3\}\)m#\1#;        t span_count
s#${P}48;5;\([0-9]\{1,3\}\)m#\1#;        t span_count

s#${P}[0-9;]*m#\1#g; t ansi_to_span # strip unhandled codes

b # next line of input

# add a corresponding span end flag
:span_count
x; s/^/s/; x
b ansi_to_span

# replace 'reset code' with correct number of  tags
:span_end
x
/^s/ {
  s/^.//
  x
  s#°#°#
  b span_end
}
x
s#°##
b ansi_to_span
" |

# Convert alternative character set
# Note we convert here, as if we do at start we have to worry about avoiding
# conversion of SGR codes etc., whereas doing here we only have to
# avoid conversions of stuff between &...; or <...>
#
# Note we could use sed to do this based around:
#   sed 'y/abcdefghijklmnopqrstuvwxyz{}`~/▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·/'
# However that would be very awkward as we need to only conv some input.
# The basic scheme that we do in the python script below is:
#  1. enable transliterate once ¡ char seen
#  2. disable once µ char seen (may be on diff line to ¡)
#  3. never transliterate between &; or <> chars
sed "
# change 'smacs' and 'rmacs' to a single char so that we can easily do
# negative matching, as sed does not support look behind expressions etc.
# Note we don't use ° like above as that's part of the alternate charset.
s#\x1b(0#¡#g;
s#µ#\µ#g; s#\x1b(B#µ#g
" |
(
python -c "
# vim:fileencoding=utf8

import sys
import locale
encoding=locale.getpreferredencoding()

old='abcdefghijklmnopqrstuvwxyz{}\`~'
new='▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·'
new=unicode(new, 'utf-8')
table=range(128)
for o,n in zip(old, new): table[ord(o)]=n

(STANDARD, ALTERNATIVE, HTML_TAG, HTML_ENTITY) = (0, 1, 2, 3)

state = STANDARD
last_mode = STANDARD
for c in unicode(sys.stdin.read(), encoding):
  if state == HTML_TAG:
    if c == '>':
      state = last_mode
  elif state == HTML_ENTITY:
    if c == ';':
      state = last_mode
  else:
    if c == '<':
      state = HTML_TAG
    elif c == '&':
      state = HTML_ENTITY
    elif c == u'¡' and state == STANDARD:
      state = ALTERNATIVE
      last_mode = ALTERNATIVE
      continue
    elif c == u'µ' and state == ALTERNATIVE:
      state = STANDARD
      last_mode = STANDARD
      continue
    elif state == ALTERNATIVE:
      c = c.translate(table)
  sys.stdout.write(c.encode(encoding))
" 2>/dev/null ||
sed 's/[¡µ]//g' # just strip aternative flag chars
)

echo "
"