s-wchcnv.adb   [plain text]


------------------------------------------------------------------------------
--                                                                          --
--                         GNAT RUNTIME COMPONENTS                          --
--                                                                          --
--                       S Y S T E M . W C H _ C N V                        --
--                                                                          --
--                                 B o d y                                  --
--                                                                          --
--                                                                          --
--          Copyright (C) 1992-2001 Free Software Foundation, Inc.          --
--                                                                          --
-- GNAT is free software;  you can  redistribute it  and/or modify it under --
-- terms of the  GNU General Public License as published  by the Free Soft- --
-- ware  Foundation;  either version 2,  or (at your option) any later ver- --
-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
-- for  more details.  You should have  received  a copy of the GNU General --
-- Public License  distributed with GNAT;  see file COPYING.  If not, write --
-- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
-- MA 02111-1307, USA.                                                      --
--                                                                          --
-- As a special exception,  if other files  instantiate  generics from this --
-- unit, or you link  this unit with other files  to produce an executable, --
-- this  unit  does not  by itself cause  the resulting  executable  to  be --
-- covered  by the  GNU  General  Public  License.  This exception does not --
-- however invalidate  any other reasons why  the executable file  might be --
-- covered by the  GNU Public License.                                      --
--                                                                          --
-- GNAT was originally developed  by the GNAT team at  New York University. --
-- Extensive contributions were provided by Ada Core Technologies Inc.      --
--                                                                          --
------------------------------------------------------------------------------

--  This package contains generic subprograms used for converting between
--  sequences of Character and Wide_Character. All access to wide character
--  sequences is isolated in this unit.

with Interfaces;     use Interfaces;
with System.WCh_Con; use System.WCh_Con;
with System.WCh_JIS; use System.WCh_JIS;

package body System.WCh_Cnv is

   --------------------------------
   -- Char_Sequence_To_Wide_Char --
   --------------------------------

   function Char_Sequence_To_Wide_Char
     (C    : Character;
      EM   : WC_Encoding_Method)
      return Wide_Character
   is
      B1 : Integer;
      C1 : Character;
      U  : Unsigned_16;
      W  : Unsigned_16;

      procedure Get_Hex (N : Character);
      --  If N is a hex character, then set B1 to 16 * B1 + character N.
      --  Raise Constraint_Error if character N is not a hex character.

      -------------
      -- Get_Hex --
      -------------

      procedure Get_Hex (N : Character) is
         B2 : constant Integer := Character'Pos (N);

      begin
         if B2 in Character'Pos ('0') .. Character'Pos ('9') then
            B1 := B1 * 16 + B2 - Character'Pos ('0');

         elsif B2 in Character'Pos ('A') .. Character'Pos ('F') then
            B1 := B1 * 16 + B2 - (Character'Pos ('A') - 10);

         elsif B2 in Character'Pos ('a') .. Character'Pos ('f') then
            B1 := B1 * 16 + B2 - (Character'Pos ('a') - 10);

         else
            raise Constraint_Error;
         end if;
      end Get_Hex;

   --  Start of processing for Char_Sequence_To_Wide_Char

   begin
      case EM is

         when WCEM_Hex =>
            if C /= ASCII.ESC then
               return Wide_Character'Val (Character'Pos (C));

            else
               B1 := 0;
               Get_Hex (In_Char);
               Get_Hex (In_Char);
               Get_Hex (In_Char);
               Get_Hex (In_Char);

               return Wide_Character'Val (B1);
            end if;

         when WCEM_Upper =>
            if C > ASCII.DEL then
               return
                 Wide_Character'Val
                   (Integer (256 * Character'Pos (C)) +
                    Character'Pos (In_Char));
            else
               return Wide_Character'Val (Character'Pos (C));
            end if;

         when WCEM_Shift_JIS =>
            if C > ASCII.DEL then
               return Shift_JIS_To_JIS (C, In_Char);
            else
               return Wide_Character'Val (Character'Pos (C));
            end if;

         when WCEM_EUC =>
            if C > ASCII.DEL then
               return EUC_To_JIS (C, In_Char);
            else
               return Wide_Character'Val (Character'Pos (C));
            end if;

         when WCEM_UTF8 =>
            if C > ASCII.DEL then

               --  16#0080#-16#07ff#: 2#110xxxxx# 2#10xxxxxx#
               --  16#0800#-16#ffff#: 2#1110xxxx# 2#10xxxxxx# 2#10xxxxxx#

               U := Unsigned_16 (Character'Pos (C));

               if (U and 2#11100000#) = 2#11000000# then
                  W := Shift_Left (U and 2#00011111#, 6);
                  U := Unsigned_16 (Character'Pos (In_Char));

                  if (U and 2#11000000#) /= 2#10000000# then
                     raise Constraint_Error;
                  end if;

                  W := W or (U and 2#00111111#);

               elsif (U and 2#11110000#) = 2#11100000# then
                  W := Shift_Left (U and 2#00001111#, 12);
                  U := Unsigned_16 (Character'Pos (In_Char));

                  if (U and 2#11000000#) /= 2#10000000# then
                     raise Constraint_Error;
                  end if;

                  W := W or Shift_Left (U and 2#00111111#, 6);
                  U := Unsigned_16 (Character'Pos (In_Char));

                  if (U and 2#11000000#) /= 2#10000000# then
                     raise Constraint_Error;
                  end if;

                  W := W or (U and 2#00111111#);

               else
                  raise Constraint_Error;
               end if;

               return Wide_Character'Val (W);

            else
               return Wide_Character'Val (Character'Pos (C));
            end if;

         when WCEM_Brackets =>

            if C /= '[' then
               return Wide_Character'Val (Character'Pos (C));
            end if;

            if In_Char /= '"' then
               raise Constraint_Error;
            end if;

            B1 := 0;
            Get_Hex (In_Char);
            Get_Hex (In_Char);
            C1 := In_Char;

            if C1 /= '"' then
               Get_Hex (C1);
               Get_Hex (In_Char);
               C1 := In_Char;

               if C1 /= '"' then
                  raise Constraint_Error;
               end if;
            end if;

            if In_Char /= ']' then
               raise Constraint_Error;
            end if;

            return Wide_Character'Val (B1);

      end case;
   end Char_Sequence_To_Wide_Char;

   --------------------------------
   -- Wide_Char_To_Char_Sequence --
   --------------------------------

   procedure Wide_Char_To_Char_Sequence
     (WC : Wide_Character;
      EM : WC_Encoding_Method)
   is
      Val    : constant Natural := Wide_Character'Pos (WC);
      Hexc   : constant array (0 .. 15) of Character := "0123456789ABCDEF";
      C1, C2 : Character;
      U      : Unsigned_16;

   begin
      case EM is

         when WCEM_Hex =>
            if Val < 256 then
               Out_Char (Character'Val (Val));

            else
               Out_Char (ASCII.ESC);
               Out_Char (Hexc (Val / (16**3)));
               Out_Char (Hexc ((Val / (16**2)) mod 16));
               Out_Char (Hexc ((Val / 16) mod 16));
               Out_Char (Hexc (Val mod 16));
            end if;

         when WCEM_Upper =>
            if Val < 128 then
               Out_Char (Character'Val (Val));

            elsif Val < 16#8000# then
               raise Constraint_Error;

            else
               Out_Char (Character'Val (Val / 256));
               Out_Char (Character'Val (Val mod 256));
            end if;

         when WCEM_Shift_JIS =>
            if Val < 128 then
               Out_Char (Character'Val (Val));
            else
               JIS_To_Shift_JIS (WC, C1, C2);
               Out_Char (C1);
               Out_Char (C2);
            end if;

         when WCEM_EUC =>
            if Val < 128 then
               Out_Char (Character'Val (Val));
            else
               JIS_To_EUC (WC, C1, C2);
               Out_Char (C1);
               Out_Char (C2);
            end if;

         when WCEM_UTF8 =>
            U := Unsigned_16 (Val);

            --  16#0000#-16#007f#: 2#0xxxxxxx#
            --  16#0080#-16#07ff#: 2#110xxxxx# 2#10xxxxxx#
            --  16#0800#-16#ffff#: 2#1110xxxx# 2#10xxxxxx# 2#10xxxxxx#

            if U < 16#80# then
               Out_Char (Character'Val (U));

            elsif U < 16#0800# then
               Out_Char (Character'Val (2#11000000# or Shift_Right (U, 6)));
               Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));

            else
               Out_Char (Character'Val (2#11100000# or Shift_Right (U, 12)));
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
                                                         and 2#00111111#)));
               Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
            end if;

         when WCEM_Brackets =>

            if Val < 256 then
               Out_Char (Character'Val (Val));

            else
               Out_Char ('[');
               Out_Char ('"');
               Out_Char (Hexc (Val / (16**3)));
               Out_Char (Hexc ((Val / (16**2)) mod 16));
               Out_Char (Hexc ((Val / 16) mod 16));
               Out_Char (Hexc (Val mod 16));
               Out_Char ('"');
               Out_Char (']');
            end if;
      end case;
   end Wide_Char_To_Char_Sequence;

end System.WCh_Cnv;