resolution for bug 1010313

I've resolved bug number 1010313 (it was neccessary for me because I use ps=
qlodbc driver
on Sun Solaris Sparc with UTF-8 characters).

Top resolve this bug I've added is_big_endian() that determines whether pla=
tform supports
big endian or little endian bytes order.
This method is also used becuase it is not possible to use any standard def=
ine,
that is "standard define" and works for different compiler and operating sy=
stems.

Additionaly, becuase I don't want to calculate every time in ucs2_to_utf8 f=
unction
platform endianness I've used two static variables, first big_endian stores=
endianness status
and second is_endian_calc determines whether endianness was calculated.

And finally, in ucs2_to_utf8 I've added swapping bytes for big_endian platf=
orm.

Below is source code that I've implemented
(I've send all ucs2_to_utf8 function source code)

int is_big_endian()
{
union { long l; char c[sizeof (long)]; } u;
u.l =3D 1;
return (u.c[sizeof (long) - 1] =3D=3D 1);
}

char *ucs2_to_utf8(const SQLWCHAR *ucs2str, SQLLEN ilen, SQLLEN *olen, BOOL=
lower_identifier)
{
char * utf8str;
/*mylog("ucs2_to_utf8 %p ilen=3D%d ", ucs2str, ilen);*/

/*
* is this a bigendian machine ?
*/
static int is_endian_calc =3D 0;
static int big_endian =3D 0;
if(!is_endian_calc)
{
big_endian =3D is_big_endian();
is_endian_calc =3D 1;
}

if (!ucs2str)
{
*olen =3D SQL_NULL_DATA;
return NULL;
}
if (SQL_NTS =3D=3D ilen)
ilen =3D ucs2strlen(ucs2str);
/*mylog(" newlen=3D%d", ilen);*/
utf8str =3D (char *) malloc(ilen * 4 + 1);
if (utf8str)
{
int i, len =3D 0;
union { UInt2 i; char c[sizeof (UInt2)]; } byte2code;
union { Int4 i; char c[sizeof (Int4)]; } byte4code, surrd1, surrd2;
const SQLWCHAR *wstr;

for (i =3D 0, wstr =3D ucs2str; i < ilen; i++, wstr++)
{
if (!*wstr)
break;
else if (0 =3D=3D (*wstr & 0xffffff80)) /* ASCII */
{
if (lower_identifier)
utf8str[len++] =3D (char) tolower(*wstr);
else
utf8str[len++] =3D (char) *wstr;
}
else if ((*wstr & byte3check) =3D=3D 0)
{
byte2code.i =3D byte2_base |
((byte2_mask1 & *wstr) >> 6) |
((byte2_mask2 & *wstr) << 8);
if(big_endian)
{
memcpy(utf8str + len, (char *) &byte2code.c[1], 1);
memcpy(utf8str + len + 1, (char *) &byte2code.c[0], 1);
}
else
{
memcpy(utf8str + len, (char *) &byte2code.i, sizeof(byte2code.i));
}
len +=3D sizeof(byte2code.i);
}
/* surrogate pair check for non ucs-2 code */
else if (surrog1_bits =3D=3D (*wstr & surrog_check))
{
surrd1.i =3D (*wstr & ~surrog_check) + surrogate_adjust;
wstr++;
i++;
surrd2.i =3D (*wstr & ~surrog_check);
byte4code.i =3D byte4_base |
((byte4_sr1_mask1 & surrd1.i) >> 8) |
((byte4_sr1_mask2 & surrd1.i) << 6) |
((byte4_sr1_mask3 & surrd1.i) << 20) |
((byte4_sr2_mask1 & surrd2.i) << 10) |
((byte4_sr2_mask2 & surrd2.i) << 24);
if(big_endian)
{
memcpy(utf8str + len, (char *) &byte2code.c[3], 1);
memcpy(utf8str + len + 1, (char *) &byte2code.c[2], 1);
memcpy(utf8str + len + 2, (char *) &byte2code.c[1], 1);
memcpy(utf8str + len + 3, (char *) &byte2code.c[0], 1);
}
else
{
memcpy(utf8str + len, (char *) &byte4code.i, sizeof(byte4code.i));
}
len +=3D sizeof(byte4code.i);
}
else
{
byte4code.i =3D byte3_base |
((byte3_mask1 & *wstr) >> 12) |
((byte3_mask2 & *wstr) << 2) |
((byte3_mask3 & *wstr) << 16);
if(big_endian)
{
memcpy(utf8str + len, (char *) &byte2code.c[3], 1);
memcpy(utf8str + len + 1, (char *) &byte2code.c[2], 1);
memcpy(utf8str + len + 2, (char *) &byte2code.c[1], 1);
}
else
{
memcpy(utf8str + len, (char *) &byte4code.i, 3);
}
len +=3D 3;
}
}
utf8str[len] =3D '\0';
if (olen)
*olen =3D len;
}
/*mylog(" olen=3D%d %s\n", *olen, utf8str ? utf8str : "");*/
return utf8str;
}

------------------------------------------------------------ ----------
W kosciele tez zdarzaja sie wpadki!
Smieszny filmik >>> http://link.interia.pl/f1e61


--
Sent via pgsql-odbc mailing list (pgsql-odbc [at] postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-odbc
Marcin Ligorowski [ Di, 29 Juli 2008 09:36 ] [ ID #1962380 ]

Re: resolution for bug 1010313

On Tue, Jul 29, 2008 at 2:36 AM, Marcin Ligorowski <ligo [at] interia.pl> wrote:

> Below is source code that I've implemented
> (I've send all ucs2_to_utf8 function source code)
>

Is it possible to attach your changes as a patch? In-line posting of
some code changes (and in non-patch format) really is not an ideal way
to post changes.

Thanks,
Adam

--
Sent via pgsql-odbc mailing list (pgsql-odbc [at] postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-odbc
Adam M [ Fr, 08 August 2008 23:03 ] [ ID #1963504 ]

Re: resolution for bug 1010313

Bellow are changes in patch format obtained by using diff utility.

Marcin

--- win_unicode.c.org Sat Sep 1 01:40:10 2007
+++ win_unicode.c Fri Jul 25 12:52:06 2008
[at] [at] -54,6 +54,13 [at] [at]

+int is_big_endian()
+{
+ union { long l; char c[sizeof (long)]; } u;
+ u.l = 1;
+ return (u.c[sizeof (long) - 1] == 1);
+}
+
SQLULEN ucs2strlen(const SQLWCHAR *ucs2str)
{
SQLULEN len;
[at] [at] -66,6 +73,17 [at] [at]
char * utf8str;

+ static int is_endian_calc = 0;
+ static int big_endian = 0;
+ if(!is_endian_calc)
+ {
+ big_endian = is_big_endian();
+ is_endian_calc = 1;
+ }
+
if (!ucs2str)
{
*olen = SQL_NULL_DATA;
[at] [at] -78,8 +96,8 [at] [at]
if (utf8str)
{
int i, len = 0;
- UInt2 byte2code;
- Int4 byte4code, surrd1, surrd2;
+ union { UInt2 i; char c[sizeof (UInt2)]; } byte2code;
+ union { Int4 i; char c[sizeof (Int4)]; } byte4code, surrd1, surrd2;
const SQLWCHAR *wstr;

for (i = 0, wstr = ucs2str; i < ilen; i++, wstr++)
[at] [at] -95,35 +113,62 [at] [at]
}
else if ((*wstr & byte3check) == 0)
{
- byte2code = byte2_base |
+ byte2code.i = byte2_base |
((byte2_mask1 & *wstr) >> 6) |
((byte2_mask2 & *wstr) << 8);
- memcpy(utf8str + len, (char *) &byte2code, sizeof(byte2code));
- len += sizeof(byte2code);
+ if(big_endian)
+ {
+ memcpy(utf8str + len, (char *) &byte2code.c[1], 1);
+ memcpy(utf8str + len + 1, (char *) &byte2code.c[0], 1);
}
+ else
+ {
+ memcpy(utf8str + len, (char *) &byte2code.i,
sizeof(byte2code.i));
+ }
+ len += sizeof(byte2code.i);
+ }
else if (surrog1_bits == (*wstr & surrog_check))
{
- surrd1 = (*wstr & ~surrog_check) + surrogate_adjust;
+ surrd1.i = (*wstr & ~surrog_check) + surrogate_adjust;
wstr++;
i++;
- surrd2 = (*wstr & ~surrog_check);
- byte4code = byte4_base |
- ((byte4_sr1_mask1 & surrd1) >> 8) |
- ((byte4_sr1_mask2 & surrd1) << 6) |
- ((byte4_sr1_mask3 & surrd1) << 20) |
- ((byte4_sr2_mask1 & surrd2) << 10) |
- ((byte4_sr2_mask2 & surrd2) << 24);
- memcpy(utf8str + len, (char *) &byte4code, sizeof(byte4code));
- len += sizeof(byte4code);
+ surrd2.i = (*wstr & ~surrog_check);
+ byte4code.i = byte4_base |
+ ((byte4_sr1_mask1 & surrd1.i) >> 8) |
+ ((byte4_sr1_mask2 & surrd1.i) << 6) |
+ ((byte4_sr1_mask3 & surrd1.i) << 20) |
+ ((byte4_sr2_mask1 & surrd2.i) << 10) |
+ ((byte4_sr2_mask2 & surrd2.i) << 24);
+ if(big_endian)
+ {
+ memcpy(utf8str + len, (char *) &byte2code.c[3], 1);
+ memcpy(utf8str + len + 1, (char *) &byte2code.c[2], 1);
+ memcpy(utf8str + len + 2, (char *) &byte2code.c[1], 1);
+ memcpy(utf8str + len + 3, (char *) &byte2code.c[0], 1);
}
else
{
- byte4code = byte3_base |
+ memcpy(utf8str + len, (char *) &byte4code.i,
sizeof(byte4code.i));
+ }
+ len += sizeof(byte4code.i);
+ }
+ else
+ {
+ byte4code.i = byte3_base |
((byte3_mask1 & *wstr) >> 12) |
((byte3_mask2 & *wstr) << 2) |
((byte3_mask3 & *wstr) << 16);
- memcpy(utf8str + len, (char *) &byte4code, 3);
+ if(big_endian)
+ {
+ memcpy(utf8str + len, (char *) &byte2code.c[3], 1);
+ memcpy(utf8str + len + 1, (char *) &byte2code.c[2], 1);
+ memcpy(utf8str + len + 2, (char *) &byte2code.c[1], 1);
+ }
+ else
+ {
+ memcpy(utf8str + len, (char *) &byte4code.i, 3);
+ }
len += 3;
}
}

Adam M pisze:
> On Tue, Jul 29, 2008 at 2:36 AM, Marcin Ligorowski <ligo [at] interia.pl> wrote:
>
>> Below is source code that I've implemented
>> (I've send all ucs2_to_utf8 function source code)
>>
>
> Is it possible to attach your changes as a patch? In-line posting of
> some code changes (and in non-patch format) really is not an ideal way
> to post changes.
>
> Thanks,
> Adam
>

------------------------------------------------------------ ----------
Igrzyska z nagrodami! Kliknij>>>> http://link.interia.pl/f1edb




--
Sent via pgsql-odbc mailing list (pgsql-odbc [at] postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-odbc
Marcin Ligorowski [ Di, 19 August 2008 20:35 ] [ ID #1964575 ]
Datenbanken » gmane.comp.db.postgresql.odbc » resolution for bug 1010313

Vorheriges Thema: Text field truncated using ADO/ODBC
Nächstes Thema: time with time zone sql type?