/*****************************************************************************/
/* Author: Chen Chien-Hsun, Taipei, Taiwan, R.O.C.                           */
/* Email: frank63@ms5.hinet.net                                              */
/* License: GNU GPL                                                          */
/* Description: A transcoding software between EUC-GB2312-80 and Unicode.    */
/* Version: 1.0.0                                                            */
/*****************************************************************************/
 
#include "Python.h"
#include <string.h>
#include <ctype.h>

char *strlwr(char *str);
char *strupr(char *str);

static PyObject *decode(PyObject *self, PyObject *args)
{
 unsigned char *eucGb2312Str, *encoding = NULL;
 PyObject *module;
 PyObject *table;
 PyObject *unicodeStr, *value, *key;
 PyObject *unicodeUtf8Str, *unicodeUtf16Str;
 Py_UNICODE *p;
 unsigned char wch[3];
 unsigned char sch[2];
 long i=0L, len, number=0L;

 module = PyImport_ImportModule("encodings.chinesecn.euc_gb23122utf");
 table = PyObject_GetAttrString(module, "mapping");

 if (!PyArg_ParseTuple(args, "s|s", &eucGb2312Str, &encoding))
    return NULL;

 len = strlen(eucGb2312Str);
 while (i < len) {
  if (eucGb2312Str[i] > 128 && eucGb2312Str[i] < 255) {
    if ((eucGb2312Str[i+1] > 63 && eucGb2312Str[i+1] < 127) || \
       (eucGb2312Str[i+1] > 127 && eucGb2312Str[i+1] < 255)) {
        number += 1L;
        i += 2L;
    }
    else 
      goto onError; //Not GB2312-80 code.
  }
  else if (eucGb2312Str[i] < 127) {
    number += 1L;
    i += 1L;
  }
  else 
    goto onError;  // Neither EUC-GB2312-80 code nor ASCII code.
 }
 
 unicodeStr = PyUnicode_FromUnicode(p, number);
 if (unicodeStr == NULL)
   goto onError;
 p = PyUnicode_AS_UNICODE(unicodeStr);

 i = 0L;
 while (i < len) {
   if (eucGb2312Str[i] > 128 && eucGb2312Str[i] < 255) {
     if ((eucGb2312Str[i+1] > 63 && eucGb2312Str[i+1] < 127) || \
        (eucGb2312Str[i+1] > 127 && eucGb2312Str[i+1] < 255)) {
       wch[0] = eucGb2312Str[i];
       wch[1] = eucGb2312Str[i+1];
       wch[2] = '\0';
       key = Py_BuildValue("s", wch);
       value = PyObject_GetItem(table, key);
       if (value == NULL) {
          goto onError;
       }
       *p++ = *PyUnicode_AS_UNICODE(value);
       i += 2L;
     }
     else 
       goto onError;
   }
   else if (eucGb2312Str[i] < 127) {
     sch[0] = eucGb2312Str[i];
     sch[1] = '\0';
     value = PyUnicode_DecodeASCII(sch, 1, "strict");
     *p++ = *PyUnicode_AS_UNICODE(value);
     i += 1L;
   }
   else 
     goto onError;
 }
 encoding = strlwr(encoding);
 if (encoding == NULL || strcmp(encoding, "unicode") == 0) 
   return (PyObject*) unicodeStr;
 else if (strcmp(encoding, "utf-8") == 0) {
   unicodeUtf8Str = PyUnicode_AsUTF8String(unicodeStr);
   return (PyObject*) unicodeUtf8Str;
 }
 else if (strcmp(encoding, "utf-16") == 0) {
   unicodeUtf16Str = PyUnicode_AsUTF16String(unicodeStr);
   return (PyObject*) unicodeUtf16Str;
 }
 else 
  return (PyObject*) unicodeStr;

onError:
   Py_XDECREF(unicodeStr);
   return NULL;
} 

static PyObject *encode(PyObject *self, PyObject *args)
{
 const char *uni;
 unsigned char *s, *encoding = NULL;
 PyObject *unicodeStr, *eucGb2312Str;
 PyObject *module;
 PyObject *table; 
 PyObject *value, *key, *index, *one, *two;
 PyObject *backToUnicode;
 Py_UNICODE *p, ch;
 long number = 0L, length = 0L, i=0L;

 module = PyImport_ImportModule("encodings.chinesecn.utf2euc_gb2312");
 table = PyObject_GetAttrString(module, "mapping");

 if (!PyArg_ParseTuple(args, "O|s", &unicodeStr, &encoding))
   return NULL;

 encoding = strlwr(encoding);
 if (encoding == NULL || strcmp(encoding, "unicode") == 0) 
   backToUnicode = unicodeStr;
 else if (strcmp(encoding, "utf-8") == 0) { 
   uni = PyString_AS_STRING(unicodeStr);
   backToUnicode = PyUnicode_DecodeUTF8(uni, strlen(uni), "strict");
 }
 else if (strcmp(encoding, "utf-16") == 0) {
   uni = PyString_AS_STRING(unicodeStr);
   backToUnicode = PyUnicode_DecodeUTF16(uni, PyObject_Length(unicodeStr), "strict", NULL); 
 }
 else 
   backToUnicode = unicodeStr;

 number = PyUnicode_GET_SIZE(backToUnicode);
 while (i < number) {
   index = Py_BuildValue("i", i);
   key = PyObject_GetItem(backToUnicode, index);
   length++;
   p = PyUnicode_AsUnicode(key);
   ch = *p;
   if (ch >= 127) 
     length++;
   i += 1L;
 }

 eucGb2312Str = PyString_FromStringAndSize(NULL, length);
 if (eucGb2312Str == NULL)
   return NULL;
 s = PyString_AS_STRING(eucGb2312Str);
 one = Py_BuildValue("i", 0);
 two = Py_BuildValue("i", 1);

 i = 0L;
 while (i < number) {
   index = Py_BuildValue("i", i);
   key = PyObject_GetItem(backToUnicode, index);
   value = PyObject_GetItem(table, key);
   if (value == NULL) { 
     p = PyUnicode_AsUnicode(key);
     ch = *p;
     if (ch < 127) 
       *s++ = (char) ch;
     else 
       goto onError; // Neither EUC-GB2312-80 code nor ASCII code.
   }
   else {
     *s++ = *PyString_AS_STRING(PyObject_GetItem(value, one));
     *s++ = *PyString_AS_STRING(PyObject_GetItem(value, two));
   }
   i += 1L;
 }
 return (PyObject*) eucGb2312Str;

onError:
  Py_XDECREF(eucGb2312Str);
  return NULL;
}

static PyMethodDef Gb2312Methods[] = {
       {"decode", decode, METH_VARARGS},
       {"encode", encode, METH_VARARGS}, 
       {NULL, NULL}
};

void initgb2312(void)
{
 PyObject *m;
 m = (PyObject*) Py_InitModule("gb2312", Gb2312Methods);
}

char *strlwr(char *str)
{
 int len, i;
 char *chstr;
 if (str == NULL) return NULL;
 len = strlen(str);
 chstr = (char*) malloc(len);
 if (chstr == NULL) {
   printf("Out of Memory\n");
   exit(0);
 } 
 for (i = 0; i < len; i++) 
    *(chstr + i) = tolower(*(str + i));
 *(chstr + i) = '\0';
 return chstr;
}

char *strupr(char *str)
{
 int len, i;
 char *chstr;
 if (str == NULL) return NULL;
 len = strlen(str);
 chstr = (char*) malloc(len);
 if (chstr == NULL) {
   printf("Out of Memory\n");
   exit(0);
 }
 for (i = 0; i < len; i++)
   *(chstr + i) = toupper(*(str + i));
 *(chstr + i) = '\0';
 return chstr;
}
