Changeset 156
- Timestamp:
- 12/29/02 18:17:30 (6 years ago)
- Files:
-
- trunk/configure.ac (modified) (3 diffs)
- trunk/src/Makefile.am (modified) (1 diff)
- trunk/src/enc_test.out (added)
- trunk/src/enc_test.txt (added)
- trunk/src/enc_uni2win.h (added)
- trunk/src/enc_win2uni.h (added)
- trunk/src/encoding.c (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/configure.ac
r154 r156 16 16 # Checks for libraries. 17 17 AC_CHECK_LIB([gadu], [gg_change_info],[],AC_MSG_ERROR("You need libgadu (from ekg>=20020807 package) to compile this.")) 18 AC_CHECK_LIB([iconv], [iconv_open])19 18 20 19 AC_ARG_WITH(pthread, … … 78 77 AC_HEADER_STDC 79 78 AC_HEADER_SYS_WAIT 80 AC_CHECK_HEADERS([stdlib.h string.h iconv.h])79 AC_CHECK_HEADERS([stdlib.h string.h]) 81 80 AC_CHECK_HEADERS([sys/socket.h netdb.h netinet/in.h]) 82 81 AC_CHECK_HEADERS([sys/time.h unistd.h errno.h fcntl.h]) … … 96 95 AC_CHECK_FUNCS([select socket],[],AC_MSG_ERROR("Some functions needed are missing")) 97 96 AC_CHECK_FUNCS([memset strchr],[],AC_MSG_ERROR("Some functions needed are missing")) 98 AC_CHECK_FUNCS([iconv_open],[],AC_MSG_ERROR("Some functions needed are missing"))99 97 AC_CHECK_LIB([gadu], [gg_event_free],[],AC_MSG_ERROR("You need libgadu>=20020807.")) 100 98 trunk/src/Makefile.am
r139 r156 37 37 users.h 38 38 39 check_PROGRAMS = encodingtest 40 encodingtest_SOURCES = \ 41 encoding.c \ 42 encoding.h 43 encodingtest_CFLAGS=-DENCODINGTEST 44 encodingtest_LDADD=$(GLIB_LIBS) 45 39 46 INCLUDES=$(GLIB_CFLAGS) -I$(top_srcdir)/libxode/include 40 47 trunk/src/encoding.c
r152 r156 1 /* $Id: encoding.c,v 1.1 0 2002/12/25 11:03:48jajcus Exp $ */1 /* $Id: encoding.c,v 1.11 2002/12/29 17:17:30 jajcus Exp $ */ 2 2 3 3 /* … … 19 19 20 20 #include "ggtrans.h" 21 #include <iconv.h>22 21 #include <errno.h> 23 22 #include <assert.h> 24 23 #include "encoding.h" 24 #include "enc_win2uni.h" 25 #include "enc_uni2win.h" 25 26 26 #define ENCODING "windows-1250" 27 28 static iconv_t to_utf8_c; 29 static iconv_t from_utf8_c; 30 static char *buf; 27 static unsigned char *buf; 31 28 static int buf_len; 32 33 29 34 30 int encoding_init(){ 35 31 36 to_utf8_c=iconv_open("utf-8",ENCODING);37 if (to_utf8_c==(iconv_t)-1)38 g_error("Couldn't open 'to Unicode' converter (%s)",g_strerror(errno));39 from_utf8_c=iconv_open(ENCODING,"utf-8");40 if (from_utf8_c==(iconv_t)-1)41 g_error("Couldn't open 'from Unicode' converter (%s)",g_strerror(errno));42 32 buf_len=16; 43 33 buf=g_new(char,buf_len); … … 47 37 void encoding_done(){ 48 38 49 iconv_close(to_utf8_c);50 iconv_close(from_utf8_c);51 39 g_free(buf); 52 40 } 53 41 54 static char *convert(iconv_t conv,const char *str){ 55 char *inbuf; 56 size_t inbytesleft; 57 char *outbuf; 58 char *oldbuf; 59 size_t outbytesleft; 60 int r; 42 char *to_utf8(const char *str){ 43 int o=0; 44 int i; 45 unsigned char c; 46 unsigned u; 61 47 62 48 if (str==NULL) return NULL; 63 *buf=0; 64 inbuf=(char *)str; 65 inbytesleft=strlen(str); 66 outbuf=buf; 67 outbytesleft=buf_len-1; 68 iconv(conv,NULL,&inbytesleft,&outbuf,&outbytesleft); 69 while(inbytesleft>0){ 70 r=iconv(conv,&inbuf,&inbytesleft,&outbuf,&outbytesleft); 71 if (r>=0){ 72 *outbuf=0; 73 break; 49 if (buf_len<(3*strlen(str)+1)){ 50 buf_len=3*strlen(str)+1; /* this should always be enough */ 51 buf=(char *)g_realloc(buf,buf_len); 52 assert(buf!=NULL); 53 } 54 for(i=0;str[i];i++){ 55 c=(unsigned char)str[i]; 56 if (c<128){ 57 buf[o++]=c; 58 continue; 74 59 } 75 switch(errno){ 76 case EILSEQ: 77 if (!*inbuf) break; 78 inbuf++; 79 *(outbuf++)='?'; 80 outbytesleft--; 81 inbytesleft--; 82 if (outbytesleft>0) break; 83 case E2BIG: 84 buf_len+=1024; 85 oldbuf=buf; 86 buf=(char *)g_realloc(oldbuf,buf_len); 87 assert(buf!=NULL); 88 outbytesleft+=1024; 89 outbuf=buf+(outbuf-oldbuf); 90 break; 91 case EINVAL: 92 inbytesleft=0; 93 break; 94 default: 95 *buf=0; 96 inbytesleft=0; 97 break; 60 u=win1250_to_unicode[c-128]; 61 if (u==0||u>0x10000){ /* we don't need character > U+0x10000 */ 62 buf[o++]='\xef'; 63 buf[o++]='\xbf'; 64 buf[o++]='\xbd'; 65 } 66 else if (u<0x800){ 67 buf[o++]=0xc0|(u>>6); 68 buf[o++]=0x80|(u&0x3f); 69 } 70 else { 71 buf[o++]=0xe0|(u>>12); 72 buf[o++]=0x80|((u>>6)&0x3f); 73 buf[o++]=0x80|(u&0x3f); 98 74 } 99 75 } 76 buf[o]=0; 77 return (char *)buf; 78 } 79 80 char *from_utf8(const char *str){ 81 unsigned char b,c; 82 unsigned u; 83 int o=0; 84 int i; 85 86 if (str==NULL) return NULL; 87 if (buf_len<(strlen(str)+1)){ 88 buf_len=strlen(str)+1; /* this should always be enough */ 89 buf=(char *)g_realloc(buf,buf_len); 90 assert(buf!=NULL); 91 } 92 for(i=0;str[i];i++){ 93 b=(unsigned char)str[i]; 94 if ((b&0x80)==0) { /* ASCII */ 95 buf[o++]=b; 96 continue; 97 } 98 if ((b&0xc0)==0x80) { /* middle of UTF-8 char */ 99 continue; 100 } 101 if ((b&0xe0)==0xc0) { 102 u=b&0x1f; 103 i++; 104 b=(unsigned char)str[i]; 105 if (b==0){ 106 buf[o++]='?'; 107 break; 108 } 109 if ((b&0xc0)!=0x80){ 110 buf[o++]='?'; 111 continue; 112 } 113 u=(u<<6)|(b&0x3f); 114 } 115 else if ((b&0xf0)==0xe0) { 116 u=b&0x0f; 117 b=(unsigned char)str[++i]; 118 if (b==0){ 119 buf[o++]='?'; 120 break; 121 } 122 if ((b&0xc0)!=0x80){ 123 buf[o++]='?'; 124 continue; 125 } 126 u=(u<<6)|(b&0x3f); 127 b=(unsigned char)str[++i]; 128 if (b==0){ 129 buf[o++]='?'; 130 break; 131 } 132 if ((b&0xc0)!=0x80){ 133 buf[o++]='?'; 134 continue; 135 } 136 u=(u<<6)|(b&0x3f); 137 } 138 else{ 139 buf[o++]='?'; 140 continue; 141 } 142 if (u<0x00a0) 143 buf[o++]='?'; 144 else if (u<0x0180) 145 buf[o++]=unicode_to_win1250_a0_17f[u-0x00a0]; 146 else if (u==0x02c7) 147 buf[o++]=0xa1; 148 else if (u<0x02d8) 149 buf[o++]='?'; 150 else if (u<0x02de) 151 buf[o++]=unicode_to_win1250_2d8_2dd[u-0x02d8]; 152 else if (u<0x2013) 153 buf[o++]='?'; 154 else if (u<0x203b) 155 buf[o++]=unicode_to_win1250_2013_203a[u-0x2013]; 156 else if (u==0x20ac) 157 buf[o++]=0x80; 158 else if (u==0x2122) 159 buf[o++]=0x99; 160 else 161 buf[o++]='?'; 162 } 163 buf[o]=0; 100 164 return buf; 101 165 } 102 166 103 char *to_utf8(const char *str){ 167 #ifdef ENCODINGTEST 168 #include <stdio.h> 104 169 105 return convert(to_utf8_c,str); 170 int main(int argc,char *argv[]){ 171 char buf[1024],*p; 172 173 encoding_init(); 174 while(1){ 175 p=fgets(buf,1024,stdin); 176 if (p==NULL || buf[0]=='\n') break; 177 printf("To UTF8: %s",to_utf8(buf)); 178 printf("From UTF8: %s",from_utf8(buf)); 179 } 180 encoding_done(); 181 return 0; 106 182 } 107 183 108 char *from_utf8(const char *str){ 109 110 return convert(from_utf8_c,str); 111 } 112 184 #endif
