SourceCode/Document

Development Platform:
Unix_Linux

iconv.c：Code Content
							/* Copyright (C) 1999-2000 Free Software Foundation, Inc.
   This file is part of the GNU ICONV Library.
   The GNU ICONV Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.
   The GNU ICONV Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.
   You should have received a copy of the GNU Library General Public
   License along with the GNU ICONV Library; see the file COPYING.LIB.  If not,
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  */
#include <iconv.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
/*
 * Converters.
 */
#include "converters.h"
/*
 * Transliteration tables.
 */
#include "cjk_variants.h"
#include "translit.h"
/*
 * Table of all supported encodings.
 */
struct encoding {
  struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
  struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
  int oflags;                 /* flags for unicode -> multibyte conversion */
};
enum {
#define DEFENCODING(xxx_names,xxx,xxx_ifuncs,xxx_ofuncs1,xxx_ofuncs2) 
  ei_##xxx ,
#include "encodings.def"
#undef DEFENCODING
ei_for_broken_compilers_that_dont_like_trailing_commas
};
#include "flags.h"
static struct encoding const all_encodings[] = {
#define DEFENCODING(xxx_names,xxx,xxx_ifuncs,xxx_ofuncs1,xxx_ofuncs2) 
  { xxx_ifuncs, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
#include "encodings.def"
#undef DEFENCODING
};
/*
 * Alias lookup function.
 * Defines
 *   struct alias { const char* name; unsigned int encoding_index; };
 *   const struct alias * aliases_lookup (const char *str, unsigned int len);
 *   #define MAX_WORD_LENGTH ...
 */
#include "aliases.h"
#if 0
/* Like !strcasecmp, except that the both strings can be assumed to be ASCII
   and the first string can be assumed to be in uppercase. */
static int strequal (const char* str1, const char* str2)
{
  unsigned char c1;
  unsigned char c2;
  for (;;) {
    c1 = * (unsigned char *) str1++;
    c2 = * (unsigned char *) str2++;
    if (c1 == 0)
      break;
    if (c2 >= 'a' && c2 <= 'z')
      c2 -= 'a'-'A';
    if (c1 != c2)
      break;
  }
  return (c1 == c2);
}
#endif
iconv_t iconv_open (const char* tocode, const char* fromcode)
{
  struct conv_struct * cd = (struct conv_struct *) malloc(sizeof(struct conv_struct));
  char buf[MAX_WORD_LENGTH+1];
  const char* cp;
  char* bp;
  const struct alias * ap;
  unsigned int count;
  if (cd == NULL) {
    errno = ENOMEM;
    return (iconv_t)(-1);
  }
  /* Before calling aliases_lookup, convert the input string to upper case,
   * and check whether it's entirely ASCII (we call gperf with option "-7"
   * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
   * or if it's too long, it is not a valid encoding name.
   */
  /* Search tocode in the table. */
  for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+1; ; cp++, bp++) {
    unsigned char c = * (unsigned char *) cp;
    if (c >= 0x80)
      goto invalid;
    if (c >= 'a' && c <= 'z')
      c -= 'a'-'A';
    *bp = c;
    if (c == '')
      break;
    if (--count == 0)
      goto invalid;
  }
  ap = aliases_lookup(buf,bp-buf);
  if (ap == NULL)
    goto invalid;
  cd->oindex = ap->encoding_index;
  cd->ofuncs = all_encodings[ap->encoding_index].ofuncs;
  cd->oflags = all_encodings[ap->encoding_index].oflags;
  /* Search fromcode in the table. */
  for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+1; ; cp++, bp++) {
    unsigned char c = * (unsigned char *) cp;
    if (c >= 0x80)
      goto invalid;
    if (c >= 'a' && c <= 'z')
      c -= 'a'-'A';
    *bp = c;
    if (c == '')
      break;
    if (--count == 0)
      goto invalid;
  }
  ap = aliases_lookup(buf,bp-buf);
  if (ap == NULL)
    goto invalid;
  cd->iindex = ap->encoding_index;
  cd->ifuncs = all_encodings[ap->encoding_index].ifuncs;
  /* Initialize the states. */
  memset(&cd->istate,'',sizeof(state_t));
  memset(&cd->ostate,'',sizeof(state_t));
  /* Initialize the operation flags. */
  cd->transliterate = 1;
  /* Done. */
  return (iconv_t)cd;
invalid:
  errno = EINVAL;
  return (iconv_t)(-1);
}
size_t iconv (iconv_t icd,
              ICONV_CONST char* * inbuf, size_t *inbytesleft,
              char* * outbuf, size_t *outbytesleft)
{
  conv_t cd = (conv_t) icd;
  if (inbuf == NULL || *inbuf == NULL) {
    if (outbuf == NULL || *outbuf == NULL) {
      /* Reset the states. */
      memset(&cd->istate,'',sizeof(state_t));
      memset(&cd->ostate,'',sizeof(state_t));
      return 0;
    } else {
      if (cd->ofuncs.xxx_reset) {
        int outcount = cd->ofuncs.xxx_reset(cd,*outbuf,*outbytesleft);
        if (outcount < 0) {
          errno = E2BIG;
          return -1;
        }
        *outbuf += outcount; *outbytesleft -= outcount;
      }
      memset(&cd->istate,'',sizeof(state_t));
      memset(&cd->ostate,'',sizeof(state_t));
      return 0;
    }
  } else {
    size_t result = 0;
    const unsigned char* inptr = (const unsigned char*) *inbuf;
    size_t inleft = *inbytesleft;
    unsigned char* outptr = (unsigned char*) *outbuf;
    size_t outleft = *outbytesleft;
    while (inleft > 0) {
      wchar_t wc;
      int incount;
      int outcount;
      incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
      if (incount <= 0) {
        if (incount == 0) {
          /* Case 1: invalid input */
          errno = EILSEQ;
          result = -1;
          break;
        }
        if (incount == -1) {
          /* Case 2: not enough bytes available to detect anything */
          errno = EINVAL;
          result = -1;
          break;
        }
        /* Case 3: k bytes read, but only a shift sequence */
        incount = -1-incount;
      } else {
        /* Case 4: k bytes read, making up a wide character */
        outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
        if (outcount != 0)
          goto outcount_ok;
        /* Try transliteration. */
        result++;
        if (cd->transliterate) {
          if (cd->oflags & HAVE_HANGUL_JAMO) {
            /* Decompose Hangul into Jamo. Use double-width Jamo (contained
               in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
               (contained in Unicode only). */
            wchar_t buf[3];
            int ret = johab_hangul_decompose(cd,buf,wc);
            if (ret != RET_ILSEQ) {
              /* we know 1 <= ret <= 3 */
              state_t backup_state = cd->ostate;
              unsigned char* backup_outptr = outptr;
              size_t backup_outleft = outleft;
              int i, sub_outcount;
              for (i = 0; i < ret; i++) {
                if (outleft == 0) {
                  sub_outcount = RET_TOOSMALL;
                  goto johab_hangul_failed;
                }
                sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
                if (sub_outcount <= 0)
                  goto johab_hangul_failed;
                if (!(sub_outcount <= outleft)) abort();
                outptr += sub_outcount; outleft -= sub_outcount;
              }
              goto char_done;
            johab_hangul_failed:
              cd->ostate = backup_state;
              outptr = backup_outptr;
              outleft = backup_outleft;
              if (sub_outcount < 0) {
                errno = E2BIG;
                result = -1;
                break;
              }
            }
          }
          {
            /* Try to use a variant, but postfix it with
               U+303E IDEOGRAPHIC VARIATION INDICATOR
               (cf. Ken Lunde's "CJKV information processing", p. 188). */
            int indx = -1;
            if (wc == 0x3006)
              indx = 0;
            else if (wc == 0x30f6)
              indx = 1;
            else if (wc >= 0x4e00 && wc < 0xa000)
              indx = cjk_variants_indx[wc-0x4e00];
            if (indx >= 0) {
              for (;; indx++) {
                wchar_t buf[2];
                unsigned short variant = cjk_variants[indx];
                unsigned short last = variant & 0x8000;
                variant &= 0x7fff;
                variant += 0x3000;
                buf[0] = variant; buf[1] = 0x303e;
                {
                  state_t backup_state = cd->ostate;
                  unsigned char* backup_outptr = outptr;
                  size_t backup_outleft = outleft;
                  int i, sub_outcount;
                  for (i = 0; i < 2; i++) {
                    if (outleft == 0) {
                      sub_outcount = RET_TOOSMALL;
                      goto variant_failed;
                    }
                    sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
                    if (sub_outcount <= 0)
                      goto variant_failed;
                    if (!(sub_outcount <= outleft)) abort();
                    outptr += sub_outcount; outleft -= sub_outcount;
                  }
                  goto char_done;
                variant_failed:
                  cd->ostate = backup_state;
                  outptr = backup_outptr;
                  outleft = backup_outleft;
                  if (sub_outcount < 0) {
                    errno = E2BIG;
                    result = -1;
                    break;
                  }
                }
                if (last)
                  break;
              }
            }
          }
          if (wc >= 0x2018 && wc <= 0x201a) {
            /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
            wchar_t substitute =
              (cd->oflags & HAVE_QUOTATION_MARKS
               ? (wc == 0x201a ? 0x2018 : wc)
               : (cd->oflags & HAVE_ACCENTS
                  ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
                  : 0x0027 /* use apostrophe */
              )  );
            outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
            if (outcount != 0)
              goto outcount_ok;
          }
          {
            /* Use the transliteration table. */
            int indx = translit_index(wc);
            if (indx >= 0) {
              const unsigned char * cp = &translit_data[indx];
              unsigned int num = *cp++;
              state_t backup_state = cd->ostate;
              unsigned char* backup_outptr = outptr;
              size_t backup_outleft = outleft;
              unsigned int i;
              int sub_outcount;
              for (i = 0; i < num; i++) {
                if (outleft == 0) {
                  sub_outcount = RET_TOOSMALL;
                  goto translit_failed;
                }
                sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
                if (sub_outcount <= 0)
                  goto translit_failed;
                if (!(sub_outcount <= outleft)) abort();
                outptr += sub_outcount; outleft -= sub_outcount;
              }
              goto char_done;
            translit_failed:
              cd->ostate = backup_state;
              outptr = backup_outptr;
              outleft = backup_outleft;
              if (sub_outcount < 0) {
                errno = E2BIG;
                result = -1;
                break;
              }
            }
          }
        }
        outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
        if (outcount != 0)
          goto outcount_ok;
        errno = EILSEQ;
        result = -1;
        break;
      outcount_ok:
        if (outcount < 0) {
          errno = E2BIG;
          result = -1;
          break;
        }
        if (!(outcount <= outleft)) abort();
        outptr += outcount; outleft -= outcount;
      char_done:
        ;
      }
      if (!(incount <= inleft)) abort();
      inptr += incount; inleft -= incount;
    }
    *inbuf = (ICONV_CONST char*) inptr;
    *inbytesleft = inleft;
    *outbuf = (char*) outptr;
    *outbytesleft = outleft;
    return result;
  }
}
int iconv_close (iconv_t icd)
{
  conv_t cd = (conv_t) icd;
  free(cd);
  return 0;
}
#ifndef LIBICONV_PLUG
int iconvctl (iconv_t icd, int request, void* argument)
{
  conv_t cd = (conv_t) icd;
  switch (request) {
    case ICONV_TRIVIALP:
      *(int *)argument = (cd->iindex == cd->oindex ? 1 : 0);
      return 0;
    case ICONV_GET_TRANSLITERATE:
      *(int *)argument = cd->transliterate;
      return 0;
    case ICONV_SET_TRANSLITERATE:
      cd->transliterate = (*(const int *)argument ? 1 : 0);
      return 0;
    default:
      errno = EINVAL;
      return -1;
  }
}
#endif