#!/usr/local/bin/perl

# ucoverage - 1998-05-13 - Roman czyborra@cs.tu-berlin.de
# lists the coverage of an *-iso10646-1 BDF font or a Unicode mapping file

# I use this script with the following make rules:
# coverage.%: %.bdf /usr/bin/ucoverage
# 	ucoverage $< > $@ 
# %.bdf: %.ttf
# 	ttf2bdf -v -r 75 -p 10 $< > $@ || :

# First study the script block boundaries and names:

while (<DATA>)
{
    $count[++$block]=0;
    ($first[$block], $last[$block], $name[$block]) = split /;\s*|\n/;
}

# Then go counting the characters present in each block:

while (<>)
{
    $char = ''; $BDF = 1 if /^STARTFONT/;
    $char = sprintf ("%04X", $1) if /^ENCODING\s+(\d+)\D/;
    $char = $2 if ! $BDF && /(^|[U][+]?|0x)([0-9A-F]{4})\W/;
    $char || next;

    # warn "$char\n";
    # next if $seen{$char}++;

    for ($_=0;$_++<$block;)
    {
	++$count[$_] if $first[$_] le $char && $char le $last[$_];
    }
}

# Lastly print what you've found:

for ($_=0;$_++<$block;)
{
    print "$count[$_]\tU+$first[$_]..U+$last[$_]:$name[$_]\n";
}

# The following table is derived from unicode/2.0/unidata/blocks.txt
# and http://www.unicode.org/unicode/alloc/Pipeline.html (1998-03-09)

__END__
0000; 007F; Basic Latin
0080; 00FF; Latin-1 Supplement
0100; 017F; Latin Extended-A
0180; 024F; Latin Extended-B
0250; 02AF; IPA Extensions
02B0; 02FF; Spacing Modifier Letters
0300; 036F; Combining Diacritical Marks
0370; 03FF; Greek
0400; 04FF; Cyrillic
0500; 052F; ???
0530; 058F; Armenian
0590; 05FF; Hebrew
0600; 06FF; Arabic
0700; 074D; Syriac
074E; 077F; ???
0780; 07B1; Thaana
07B2; 08FF; ???
0900; 097F; Devanagari
0980; 09FF; Bengali
0A00; 0A7F; Gurmukhi
0A80; 0AFF; Gujarati
0B00; 0B7F; Oriya
0B80; 0BFF; Tamil
0C00; 0C7F; Telugu
0C80; 0CFF; Kannada
0D00; 0D7F; Malayalam
0D80; 0DFF; Sinhalese
0E00; 0E7F; Thai
0E80; 0EFF; Lao
0F00; 0FBF; Tibetan
0FC0; 109F; ???
10A0; 10FF; Georgian
1100; 11FF; Hangul Jamo
1200; 137F; Ethiopic
1380; 139F; ???
13A0; 13FF; Cherokee
1400; 167F; Canadian Syllabics
1680; 169F; Ogham
16A0; 16FF; Runic
1700; 1759; Burmese
175A; 177F; ???
1780; 17E9; Khmer
17EA; 1DFF; ???
1E00; 1EFF; Latin Extended Additional
1F00; 1FFF; Greek Extended
2000; 206F; General Punctuation
2070; 209F; Superscripts and Subscripts
20A0; 20CF; Currency Symbols
20D0; 20FF; Combining Marks for Symbols
2100; 214F; Letterlike Symbols
2150; 218F; Number Forms
2190; 21FF; Arrows
2200; 22FF; Mathematical Operators
2300; 23FF; Miscellaneous Technical
2400; 243F; Control Pictures
2440; 245F; Optical Character Recognition
2460; 24FF; Enclosed Alphanumerics
2500; 257F; Box Drawing
2580; 259F; Block Elements
25A0; 25FF; Geometric Shapes
2600; 26FF; Miscellaneous Symbols
2700; 27BF; Dingbats
27C0; 27FF; ???
2800; 28FF; Braille Pattern Symbols
2900; 2EFF; ???
2F00; 2FD5; KangXi radicals
2FD5; 2FFF; ???
3000; 303F; CJK Symbols and Punctuation
3040; 309F; Hiragana
30A0; 30FF; Katakana
3100; 312F; Bopomofo
3130; 318F; Hangul Compatibility Jamo
3190; 319F; Kanbun
31A0; 31FF; ???
3200; 32FF; Enclosed CJK Letters and Months
3300; 33FF; CJK Compatibility
3400; 4DFF; CJK Unified Ideographs, Extension A
4E00; 9FFF; CJK Unified Ideographs
A000; A4C8; Yi
A4C9; ABFF; ???
AC00; D7A3; Hangul Syllables
D7A4; D7FF; ???
D800; DB7F; High Surrogates
DB80; DBFF; High Private Use Surrogates
DC00; DFFF; Low Surrogates
E000; F8FF; Private Use
F900; FAFF; CJK Compatibility Ideographs
FB00; FB4F; Alphabetic Presentation Forms
FB50; FDFF; Arabic Presentation Forms-A
FE00; FE1F; ???
FE20; FE2F; Combining Half Marks
FE30; FE4F; CJK Compatibility Forms
FE50; FE6F; Small Form Variants
FE70; FEFF; Arabic Presentation Forms-B
FF00; FFEF; Halfwidth and Fullwidth Forms
FFF0; FFFF; Specials
