#!/usr/bin/perl
# parsegba.pl - parse gba input, write mys output
# GNU (C) Gaspar Sinai 
# Tokyo 2002-03-26
print <<EOD;
NAME=gb18030.my
#------------------------------------------------------
COMM=Map sparse Unicode to GB18030 (2000-NOV) and back
COMM=It only has the synchronization points where 
COMM=output could be calculated by linear increment.
COMM=Gaspar Sinai <gsinai\@yudit.org> Tokyo 2002-03-26
COMM=
COMM=Here is the algorithm to get GB 18030 codes from this
COMM=garbage:
COMM=If input unicode characters is less than 0x80 emit that code as single byte
COMM=Search for closest lowest key that matches the unicode character:
COMM=
COMM=1) BMP
COMM= 1a If value is greater than 0xffff output will be two bytes:
COMM=    value + (unicode_vle - closest_key) 
COMM=  As always higher byte is emitted first.
COMM= 1b If value is less than 0x8000 output will be four bytes: 
COMM=    num = linear(value) + (unicode_vle - closest_key);
COMM=    num = nonlinear(num);
COMM=
COMM=   function nonlinear (num)
COMM=        k3 = (num % 10)+0x30; num = num / 10;
COMM=        k2 = (num % 126)+0x81; num = num / 126;
COMM=        k1 = (num % 10)+0x30; num = num / 10;
COMM=        k0 = (num % 126)+0x81; 
COMM=        return ((k0 << 24) + (k1 << 16) + (k2<<8) + k3);
COMM=
COMM=    function linear(value):
COMM=        k0 = (value >> 24) & 0xff; // 0x81..0xfe
COMM=        k1 = (value >> 16) & 0xff; // 0x30..0x39
COMM=        k2 = (value >> 8) & 0xff;   // 0x81..0xfe
COMM=        k3 = (value >> 0) & 0xff;   // 0x30..0x39
COMM=        num = (k0-0x81); num = num * 10; 
COMM=        num += (k1-0x30); num = num * 126; 
COMM=        num += (k2-0x81); num = num * 10; 
COMM=        num += (k3-0x30); 
COMM=        return (num);
COMM=
COMM=2) NON-BMP (unicode_value between 0x10000..0x10FFFF)
COMM=    num = unicode_value - 0x10000 + 0x2E248;
COMM=    nonlinear (num);
COMM=   - 0x10000 should producce 0x90308130
COMM=   - 0x10FFFF should be  0xE3329A35
#------------------------------------------------------
TYPE=0
SECTION=encode
ENCODE=1
#
# key 1 for 16 bit (16-bitunicode)
# value 2 for 32 bit (4-byte-gb) values 
#
KEY_WIDTH=1
VALUE_WIDTH=2
KEY_LENGTH=0
VALUE_LENGTH=0
#
EOD
$lastvle = 0;
while (<>)
{
  chomp;
  next unless (/^([0-9A-F]{4})\s+([0-9A-F]{1,8})/);
  $key = hex ($1);
  $vle = hex ($2);
  if ($key == 0)
  {
    printf ("%04X -> %08X\n", $key, $vle);
    next;
  }
  elsif ($vle==0)
  {
    next;
  }
  elsif ($key < 0x80)
  {
    next;
  }
  elsif ($vle > 0xffff)
  {
    $lastvle = &incGB($lastvle);
    if ($vle != $lastvle)
    {
      printf ("%04X -> %08X\n", $key, $vle);
    }
    $lastvle = $vle;
  } 
  elsif ($vle != ++$lastvle)
  {
     printf ("%04X -> %08X\n", $key, $vle);
     $lastvle = $vle;
  }
}
printf ("%04X -> %08X\n", 0xFFFF, 0x8431A439);

# Can not fit - can be checked
#printf ("%04X -> %08X\n", 0x10000, 0x90308130);
#printf ("%04X -> %08X\n", 0x10FFFF, 0xE3329A35);

exit (0);

#
# increment a GB code
#
sub 
incGB
{
  my $n = &fromGB($_[0]);
  $n++;
  return (&toGB($n));
}


#
# Convert linear code to GB
# 
sub
toGB
{
  my $k3 = ($_[0] % 10)+0x30; $_[0] = $_[0] / 10;
  my $k2 = ($_[0] % 126)+0x81; $_[0] = $_[0] / 126;
  my $k1 = ($_[0] % 10)+0x30; $_[0] = $_[0] / 10;
  my $k0 = ($_[0] % 126)+0x81; 
  return (($k0 << 24) + ($k1 << 16) + ($k2<<8) + $k3);
}


#
# Convert GB to linear code
# 
sub 
fromGB
{
  my $k0 = ($_[0] >> 24) & 0xff;
  my $k1 = ($_[0] >> 16) & 0xff;
  my $k2 = ($_[0] >> 8) & 0xff;
  my $k3 = ($_[0] >> 0) & 0xff;
  my $num;
  $num  = ($k0-0x81); $num = $num * 10; 
  $num += ($k1-0x30); $num = $num * 126; 
  $num += ($k2-0x81); $num = $num * 10; 
  $num += ($k3-0x30); 
  return ($num);
}