2 * msvcrt.dll mbcs functions
4 * Copyright 1999 Alexandre Julliard
5 * Copyright 2000 Jon Griffths
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Not currently binary compatible with win32. MSVCRT_mbctype must be
23 * populated correctly and the ismb* functions should reference it.
27 #include "wine/unicode.h"
28 #include "wine/debug.h"
30 WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);
32 unsigned char MSVCRT_mbctype[257];
33 int MSVCRT___mb_cur_max = 1;
35 static MSVCRT_wchar_t msvcrt_mbc_to_wc(unsigned int ch)
45 mbch[0] = (ch >> 8) & 0xff;
49 if (!MultiByteToWideChar(msvcrt_current_lc_all_cp, 0, mbch, n_chars, &chW, 1))
51 WARN("MultiByteToWideChar failed on %x\n", ch);
57 /*********************************************************************
58 * __p__mbctype (MSVCRT.@)
60 unsigned char* __p__mbctype(void)
62 return MSVCRT_mbctype;
65 /*********************************************************************
66 * __p___mb_cur_max(MSVCRT.@)
68 int* __p___mb_cur_max(void)
70 return &MSVCRT___mb_cur_max;
73 /*********************************************************************
76 unsigned int _mbsnextc(const unsigned char* str)
78 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*str))
79 return *str << 8 | str[1];
80 return *str; /* ASCII CP or SB char */
83 /*********************************************************************
84 * _mbctolower(MSVCRT.@)
86 unsigned int _mbctolower(unsigned int c)
88 if (MSVCRT_isleadbyte(c))
90 FIXME("Handle MBC chars\n");
93 return tolower(c); /* ASCII CP or SB char */
96 /*********************************************************************
97 * _mbctoupper(MSVCRT.@)
99 unsigned int _mbctoupper(unsigned int c)
101 if (MSVCRT_isleadbyte(c))
103 FIXME("Handle MBC chars\n");
106 return toupper(c); /* ASCII CP or SB char */
109 /*********************************************************************
112 unsigned char* _mbsdec(const unsigned char* start, const unsigned char* cur)
114 if(MSVCRT___mb_cur_max > 1)
115 return (unsigned char *)(_ismbstrail(start,cur-1) ? cur - 2 : cur -1);
117 return (unsigned char *)cur - 1; /* ASCII CP or SB char */
120 /*********************************************************************
123 unsigned char* _mbsinc(const unsigned char* str)
125 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*str))
126 return (unsigned char*)str + 2; /* MB char */
128 return (unsigned char*)str + 1; /* ASCII CP or SB char */
131 /*********************************************************************
134 unsigned char* _mbsninc(const unsigned char* str, MSVCRT_size_t num)
138 if(MSVCRT___mb_cur_max > 1)
142 return (unsigned char*)str;
144 return (unsigned char*)str + num; /* ASCII CP */
147 /*********************************************************************
150 unsigned int _mbclen(const unsigned char* str)
152 return MSVCRT_isleadbyte(*str) ? 2 : 1;
155 /*********************************************************************
158 int MSVCRT_mblen(const char* str, MSVCRT_size_t size)
160 if (str && *str && size)
162 if(MSVCRT___mb_cur_max == 1)
163 return 1; /* ASCII CP */
165 return !MSVCRT_isleadbyte(*str) ? 1 : (size>1 ? 2 : -1);
170 /*********************************************************************
173 MSVCRT_size_t _mbslen(const unsigned char* str)
175 if(MSVCRT___mb_cur_max > 1)
177 MSVCRT_size_t len = 0;
180 str += MSVCRT_isleadbyte(*str) ? 2 : 1;
185 return strlen(str); /* ASCII CP */
188 /*********************************************************************
189 * _mbstrlen(MSVCRT.@)
191 MSVCRT_size_t _mbstrlen(const char* str)
193 if(MSVCRT___mb_cur_max > 1)
195 MSVCRT_size_t len = 0;
198 /* FIXME: According to the documentation we are supposed to test for
199 * multi-byte character validity. Whatever that means
201 str += MSVCRT_isleadbyte(*str) ? 2 : 1;
206 return strlen(str); /* ASCII CP */
209 /*********************************************************************
212 void _mbccpy(unsigned char* dest, const unsigned char* src)
215 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*src))
216 *dest = *++src; /* MB char */
218 ERR("failure.. is this ok?\n");
221 /*********************************************************************
224 unsigned char* _mbsncpy(unsigned char* dst, const unsigned char* src, MSVCRT_size_t n)
226 unsigned char* ret = dst;
229 if(MSVCRT___mb_cur_max > 1)
235 if (MSVCRT_isleadbyte(*src++))
244 if (!(*dst++ = *src++)) break;
247 while (n--) *dst++ = 0;
251 /*********************************************************************
252 * _mbsnbcpy(MSVCRT.@)
254 unsigned char* _mbsnbcpy(unsigned char* dst, const unsigned char* src, MSVCRT_size_t n)
256 unsigned char* ret = dst;
259 if(MSVCRT___mb_cur_max > 1)
261 while (*src && (n > 1))
265 if (MSVCRT_isleadbyte(*src++))
271 if (*src && n && !MSVCRT_isleadbyte(*src))
273 /* If the last character is a multi-byte character then
274 * we cannot copy it since we have only one byte left
285 if (!(*dst++ = *src++)) break;
288 while (n--) *dst++ = 0;
292 /*********************************************************************
295 int _mbscmp(const unsigned char* str, const unsigned char* cmp)
297 if(MSVCRT___mb_cur_max > 1)
299 unsigned int strc, cmpc;
302 return *cmp ? -1 : 0;
305 strc = _mbsnextc(str);
306 cmpc = _mbsnextc(cmp);
308 return strc < cmpc ? -1 : 1;
309 str +=(strc > 255) ? 2 : 1;
310 cmp +=(strc > 255) ? 2 : 1; /* equal, use same increment */
313 return strcmp(str, cmp); /* ASCII CP */
316 /*********************************************************************
317 * _mbsicoll(MSVCRT.@)
318 * FIXME: handle locales.
320 int _mbsicoll(const unsigned char* str, const unsigned char* cmp)
322 if(MSVCRT___mb_cur_max > 1)
324 unsigned int strc, cmpc;
327 return *cmp ? -1 : 0;
330 strc = _mbctolower(_mbsnextc(str));
331 cmpc = _mbctolower(_mbsnextc(cmp));
333 return strc < cmpc ? -1 : 1;
334 str +=(strc > 255) ? 2 : 1;
335 cmp +=(strc > 255) ? 2 : 1; /* equal, use same increment */
338 return strcasecmp(str, cmp); /* ASCII CP */
342 /*********************************************************************
345 int _mbsicmp(const unsigned char* str, const unsigned char* cmp)
347 if(MSVCRT___mb_cur_max > 1)
349 unsigned int strc, cmpc;
352 return *cmp ? -1 : 0;
355 strc = _mbctolower(_mbsnextc(str));
356 cmpc = _mbctolower(_mbsnextc(cmp));
358 return strc < cmpc ? -1 : 1;
359 str +=(strc > 255) ? 2 : 1;
360 cmp +=(strc > 255) ? 2 : 1; /* equal, use same increment */
363 return strcasecmp(str, cmp); /* ASCII CP */
366 /*********************************************************************
369 int _mbsncmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
374 if(MSVCRT___mb_cur_max > 1)
376 unsigned int strc, cmpc;
381 return *cmp ? -1 : 0;
384 strc = _mbsnextc(str);
385 cmpc = _mbsnextc(cmp);
387 return strc < cmpc ? -1 : 1;
388 inc=(strc > 255) ? 2 : 1; /* Equal, use same increment */
392 return 0; /* Matched len chars */
394 return strncmp(str, cmp, len); /* ASCII CP */
397 /*********************************************************************
398 * _mbsnbcmp(MSVCRT.@)
400 int _mbsnbcmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
404 if(MSVCRT___mb_cur_max > 1)
406 unsigned int strc, cmpc;
411 return *cmp ? -1 : 0;
414 if (MSVCRT_isleadbyte(*str))
416 strc=(len>=2)?_mbsnextc(str):0;
424 if (MSVCRT_isleadbyte(*cmp))
425 cmpc=(len>=2)?_mbsnextc(cmp):0;
429 return strc < cmpc ? -1 : 1;
434 return 0; /* Matched len chars */
436 return strncmp(str,cmp,len);
439 /*********************************************************************
440 * _mbsnicmp(MSVCRT.@)
442 * Compare two multibyte strings case insensitively to 'len' characters.
444 int _mbsnicmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
446 /* FIXME: No tolower() for mb strings yet */
447 if(MSVCRT___mb_cur_max > 1)
449 unsigned int strc, cmpc;
453 return *cmp ? -1 : 0;
456 strc = _mbctolower(_mbsnextc(str));
457 cmpc = _mbctolower(_mbsnextc(cmp));
459 return strc < cmpc ? -1 : 1;
460 str +=(strc > 255) ? 2 : 1;
461 cmp +=(strc > 255) ? 2 : 1; /* Equal, use same increment */
463 return 0; /* Matched len chars */
465 return strncasecmp(str, cmp, len); /* ASCII CP */
468 /*********************************************************************
469 * _mbsnbicmp(MSVCRT.@)
471 int _mbsnbicmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
475 if(MSVCRT___mb_cur_max > 1)
477 unsigned int strc, cmpc;
482 return *cmp ? -1 : 0;
485 if (MSVCRT_isleadbyte(*str))
487 strc=(len>=2)?_mbsnextc(str):0;
495 if (MSVCRT_isleadbyte(*cmp))
496 cmpc=(len>=2)?_mbsnextc(cmp):0;
499 strc = _mbctolower(strc);
500 cmpc = _mbctolower(cmpc);
502 return strc < cmpc ? -1 : 1;
507 return 0; /* Matched len bytes */
509 return strncmp(str,cmp,len);
512 /*********************************************************************
515 * Find a multibyte character in a multibyte string.
517 unsigned char* _mbschr(const unsigned char* s, unsigned int x)
519 if(MSVCRT___mb_cur_max > 1)
526 return (unsigned char*)s;
529 s += c > 255 ? 2 : 1;
532 return strchr(s, x); /* ASCII CP */
535 /*********************************************************************
538 unsigned char* _mbsrchr(const unsigned char* s, unsigned int x)
540 if(MSVCRT___mb_cur_max > 1)
543 unsigned char* match=NULL;
549 match=(unsigned char*)s;
552 s +=(c > 255) ? 2 : 1;
558 /*********************************************************************
561 * Find and extract tokens from strings
563 unsigned char* _mbstok(unsigned char *str, const unsigned char *delim)
565 thread_data_t *data = msvcrt_get_thread_data();
568 if(MSVCRT___mb_cur_max > 1)
573 if (!(str = data->mbstok_next)) return NULL;
575 while ((c = _mbsnextc(str)) && _mbschr(delim, c)) {
576 str += c > 255 ? 2 : 1;
578 if (!*str) return NULL;
580 while ((c = _mbsnextc(str)) && !_mbschr(delim, c)) {
581 str += c > 255 ? 2 : 1;
585 if (c > 255) *str++ = 0;
587 data->mbstok_next = str;
590 return strtok(str, delim); /* ASCII CP */
593 /*********************************************************************
596 int MSVCRT_mbtowc(MSVCRT_wchar_t *dst, const char* str, MSVCRT_size_t n)
598 /* temp var needed because MultiByteToWideChar wants non NULL destination */
599 MSVCRT_wchar_t tmpdst = '\0';
603 if(!MultiByteToWideChar(CP_ACP, 0, str, n, &tmpdst, 1))
607 /* return the number of bytes from src that have been used */
610 if(n >= 2 && MSVCRT_isleadbyte(*str) && str[1])
615 /*********************************************************************
616 * _mbbtombc(MSVCRT.@)
618 unsigned int _mbbtombc(unsigned int c)
620 if(MSVCRT___mb_cur_max > 1 &&
621 ((c >= 0x20 && c <=0x7e) ||(c >= 0xa1 && c <= 0xdf)))
623 /* FIXME: I can't get this function to return anything
624 * different from what I pass it...
627 return c; /* ASCII CP or no MB char */
630 /*********************************************************************
631 * _ismbbkana(MSVCRT.@)
633 int _ismbbkana(unsigned int c)
635 /* FIXME: use lc_ctype when supported, not lc_all */
636 if(msvcrt_current_lc_all_cp == 932)
638 /* Japanese/Katakana, CP 932 */
639 return (c >= 0xa1 && c <= 0xdf);
644 /*********************************************************************
645 * _ismbcdigit(MSVCRT.@)
647 int _ismbcdigit(unsigned int ch)
649 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
650 return (get_char_typeW( wch ) & C1_DIGIT);
653 /*********************************************************************
654 * _ismbcgraph(MSVCRT.@)
656 int _ismbcgraph(unsigned int ch)
658 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
659 return (get_char_typeW( wch ) & (C1_UPPER | C1_LOWER | C1_DIGIT | C1_PUNCT | C1_ALPHA));
662 /*********************************************************************
663 * _ismbcalpha (MSVCRT.@)
665 int _ismbcalpha(unsigned int ch)
667 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
668 return (get_char_typeW( wch ) & C1_ALPHA);
671 /*********************************************************************
672 * _ismbclower (MSVCRT.@)
674 int _ismbclower(unsigned int ch)
676 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
677 return (get_char_typeW( wch ) & C1_UPPER);
680 /*********************************************************************
681 * _ismbcupper (MSVCRT.@)
683 int _ismbcupper(unsigned int ch)
685 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
686 return (get_char_typeW( wch ) & C1_LOWER);
689 /*********************************************************************
690 * _ismbcsymbol(MSVCRT.@)
692 int _ismbcsymbol(unsigned int ch)
694 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
696 if (!GetStringTypeW(CT_CTYPE3, &wch, 1, &ctype))
698 WARN("GetStringTypeW failed on %x\n", ch);
701 return ((ctype & C3_SYMBOL) != 0);
704 /*********************************************************************
705 * _ismbcalnum (MSVCRT.@)
707 int _ismbcalnum(unsigned int ch)
709 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
710 return (get_char_typeW( wch ) & (C1_ALPHA | C1_DIGIT));
713 /*********************************************************************
714 * _ismbcspace (MSVCRT.@)
716 int _ismbcspace(unsigned int ch)
718 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
719 return (get_char_typeW( wch ) & C1_SPACE);
722 /*********************************************************************
723 * _ismbcprint (MSVCRT.@)
725 int _ismbcprint(unsigned int ch)
727 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
728 return (get_char_typeW( wch ) & (C1_UPPER | C1_LOWER | C1_DIGIT | C1_PUNCT | C1_ALPHA | C1_SPACE));
731 /*********************************************************************
732 * _ismbcpunct(MSVCRT.@)
734 int _ismbcpunct(unsigned int ch)
736 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
737 return (get_char_typeW( wch ) & C1_PUNCT);
740 /*********************************************************************
741 * _ismbchira(MSVCRT.@)
743 int _ismbchira(unsigned int c)
745 /* FIXME: use lc_ctype when supported, not lc_all */
746 if(msvcrt_current_lc_all_cp == 932)
748 /* Japanese/Hiragana, CP 932 */
749 return (c >= 0x829f && c <= 0x82f1);
754 /*********************************************************************
755 * _ismbckata(MSVCRT.@)
757 int _ismbckata(unsigned int c)
759 /* FIXME: use lc_ctype when supported, not lc_all */
760 if(msvcrt_current_lc_all_cp == 932)
763 return _ismbbkana(c);
764 /* Japanese/Katakana, CP 932 */
765 return (c >= 0x8340 && c <= 0x8396 && c != 0x837f);
770 /*********************************************************************
771 * _ismbblead(MSVCRT.@)
773 int _ismbblead(unsigned int c)
775 /* FIXME: should reference MSVCRT_mbctype */
776 return MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(c);
780 /*********************************************************************
781 * _ismbbtrail(MSVCRT.@)
783 int _ismbbtrail(unsigned int c)
785 /* FIXME: should reference MSVCRT_mbctype */
786 return !_ismbblead(c);
789 /*********************************************************************
790 * _ismbslead(MSVCRT.@)
792 int _ismbslead(const unsigned char* start, const unsigned char* str)
794 /* Lead bytes can also be trail bytes if caller messed up
795 * iterating through the string...
797 if(MSVCRT___mb_cur_max > 1)
800 start += MSVCRT_isleadbyte(*str) ? 2 : 1;
803 return MSVCRT_isleadbyte(*str);
805 return 0; /* Must have been a trail, we skipped it */
808 /*********************************************************************
809 * _ismbstrail(MSVCRT.@)
811 int _ismbstrail(const unsigned char* start, const unsigned char* str)
813 /* Must not be a lead, and must be preceded by one */
814 return !_ismbslead(start, str) && MSVCRT_isleadbyte(str[-1]);
817 /*********************************************************************
820 unsigned char* _mbsset(unsigned char* str, unsigned int c)
822 unsigned char* ret = str;
824 if(MSVCRT___mb_cur_max == 1 || c < 256)
825 return _strset(str, c); /* ASCII CP or SB char */
827 c &= 0xffff; /* Strip high bits */
829 while(str[0] && str[1])
835 str[0] = '\0'; /* FIXME: OK to shorten? */
840 /*********************************************************************
841 * _mbsnbset(MSVCRT.@)
843 unsigned char* _mbsnbset(unsigned char *str, unsigned int c, MSVCRT_size_t len)
845 unsigned char *ret = str;
850 if(MSVCRT___mb_cur_max == 1 || c < 256)
851 return _strnset(str, c, len); /* ASCII CP or SB char */
853 c &= 0xffff; /* Strip high bits */
855 while(str[0] && str[1] && (len > 1))
863 /* as per msdn pad with a blank character */
870 /*********************************************************************
873 unsigned char* _mbsnset(unsigned char* str, unsigned int c, MSVCRT_size_t len)
875 unsigned char *ret = str;
880 if(MSVCRT___mb_cur_max == 1 || c < 256)
881 return _strnset(str, c, len); /* ASCII CP or SB char */
883 c &= 0xffff; /* Strip high bits */
885 while(str[0] && str[1] && len--)
891 str[0] = '\0'; /* FIXME: OK to shorten? */
896 /*********************************************************************
897 * _mbsnccnt(MSVCRT.@)
898 * 'c' is for 'character'.
900 MSVCRT_size_t _mbsnccnt(const unsigned char* str, MSVCRT_size_t len)
903 if(MSVCRT___mb_cur_max > 1)
906 while(*str && len-- > 0)
908 if(MSVCRT_isleadbyte(*str))
921 return min(ret, len); /* ASCII CP */
924 /*********************************************************************
925 * _mbsnbcnt(MSVCRT.@)
926 * 'b' is for byte count.
928 MSVCRT_size_t _mbsnbcnt(const unsigned char* str, MSVCRT_size_t len)
931 if(MSVCRT___mb_cur_max > 1)
933 const unsigned char* xstr = str;
934 while(*xstr && len-- > 0)
936 if (MSVCRT_isleadbyte(*xstr++))
942 return min(ret, len); /* ASCII CP */
946 /*********************************************************************
947 * _mbsnbcat(MSVCRT.@)
949 unsigned char* _mbsnbcat(unsigned char* dst, const unsigned char* src, MSVCRT_size_t len)
951 if(MSVCRT___mb_cur_max > 1)
955 if (MSVCRT_isleadbyte(*dst++)) {
959 /* as per msdn overwrite the lead byte in front of '\0' */
965 while (*src && len--) *dst++ = *src++;
969 return strncat(dst, src, len); /* ASCII CP */
973 /*********************************************************************
976 unsigned char* _mbsncat(unsigned char* dst, const unsigned char* src, MSVCRT_size_t len)
978 if(MSVCRT___mb_cur_max > 1)
983 if (MSVCRT_isleadbyte(*dst++))
986 while (*src && len--)
989 if(MSVCRT_isleadbyte(*src++))
995 return strncat(dst, src, len); /* ASCII CP */
999 /*********************************************************************
1002 unsigned char* _mbslwr(unsigned char* s)
1006 if (MSVCRT___mb_cur_max > 1)
1012 c = _mbctolower(_mbsnextc(s));
1013 /* Note that I assume that the size of the character is unchanged */
1027 /*********************************************************************
1030 unsigned char* _mbsupr(unsigned char* s)
1034 if (MSVCRT___mb_cur_max > 1)
1040 c = _mbctoupper(_mbsnextc(s));
1041 /* Note that I assume that the size of the character is unchanged */
1055 /*********************************************************************
1056 * _mbsspn (MSVCRT.@)
1058 MSVCRT_size_t _mbsspn(const unsigned char* string, const unsigned char* set)
1060 const unsigned char *p, *q;
1062 for (p = string; *p; p++)
1064 if (MSVCRT_isleadbyte(*p))
1066 for (q = set; *q; q++)
1070 if ((*p == *q) && (p[1] == q[1]))
1078 for (q = set; *q; q++)
1085 /*********************************************************************
1086 * _mbscspn(MSVCRT.@)
1088 MSVCRT_size_t _mbscspn(const unsigned char* str, const unsigned char* cmp)
1090 if (MSVCRT___mb_cur_max > 1)
1091 FIXME("don't handle double character case\n");
1092 return strcspn(str, cmp);
1095 /*********************************************************************
1096 * _mbsrev (MSVCRT.@)
1098 unsigned char* _mbsrev(unsigned char* str)
1100 int i, len = _mbslen(str);
1101 unsigned char *p, *temp=MSVCRT_malloc(len*2);
1106 /* unpack multibyte string to temp buffer */
1108 for(i=0; i<len; i++)
1110 if (MSVCRT_isleadbyte(*p))
1122 /* repack it in the reverse order */
1124 for(i=len-1; i>=0; i--)
1126 if(MSVCRT_isleadbyte(temp[i*2]))
1142 /*********************************************************************
1143 * _mbspbrk (MSVCRT.@)
1145 unsigned char* _mbspbrk(const unsigned char* str, const unsigned char* accept)
1147 const unsigned char* p;
1151 for(p = accept; *p; p += (MSVCRT_isleadbyte(*p)?2:1) )
1154 if( !MSVCRT_isleadbyte(*p) || ( *(p+1) == *(str+1) ) )
1155 return (unsigned char*)str;
1157 str += (MSVCRT_isleadbyte(*str)?2:1);