2 * msvcrt.dll mbcs functions
4 * Copyright 1999 Alexandre Julliard
5 * Copyright 2000 Jon Griffths
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Not currently binary compatible with win32. MSVCRT_mbctype must be
23 * populated correctly and the ismb* functions should reference it.
27 #include "wine/unicode.h"
28 #include "wine/debug.h"
30 WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);
32 unsigned char MSVCRT_mbctype[257];
33 int MSVCRT___mb_cur_max = 1;
35 static MSVCRT_wchar_t msvcrt_mbc_to_wc(unsigned int ch)
45 mbch[0] = (ch >> 8) & 0xff;
49 if (!MultiByteToWideChar(msvcrt_current_lc_all_cp, 0, mbch, n_chars, &chW, 1))
51 WARN("MultiByteToWideChar failed on %x\n", ch);
57 /*********************************************************************
58 * __p__mbctype (MSVCRT.@)
60 unsigned char* __p__mbctype(void)
62 return MSVCRT_mbctype;
65 /*********************************************************************
66 * __p___mb_cur_max(MSVCRT.@)
68 int* __p___mb_cur_max(void)
70 return &MSVCRT___mb_cur_max;
73 /*********************************************************************
76 unsigned int _mbsnextc(const unsigned char* str)
78 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*str))
79 return *str << 8 | str[1];
80 return *str; /* ASCII CP or SB char */
83 /*********************************************************************
84 * _mbctolower(MSVCRT.@)
86 unsigned int _mbctolower(unsigned int c)
88 if (MSVCRT_isleadbyte(c))
90 FIXME("Handle MBC chars\n");
93 return tolower(c); /* ASCII CP or SB char */
96 /*********************************************************************
97 * _mbctoupper(MSVCRT.@)
99 unsigned int _mbctoupper(unsigned int c)
101 if (MSVCRT_isleadbyte(c))
103 FIXME("Handle MBC chars\n");
106 return toupper(c); /* ASCII CP or SB char */
109 /*********************************************************************
112 unsigned char* _mbsdec(const unsigned char* start, const unsigned char* cur)
114 if(MSVCRT___mb_cur_max > 1)
115 return (char *)(_ismbstrail(start,cur-1) ? cur - 2 : cur -1);
117 return (char *)cur - 1; /* ASCII CP or SB char */
120 /*********************************************************************
123 unsigned char* _mbsinc(const unsigned char* str)
125 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*str))
126 return (unsigned char*)str + 2; /* MB char */
128 return (unsigned char*)str + 1; /* ASCII CP or SB char */
131 /*********************************************************************
134 unsigned char* _mbsninc(const unsigned char* str, MSVCRT_size_t num)
138 if(MSVCRT___mb_cur_max > 1)
142 return (unsigned char*)str;
144 return (unsigned char*)str + num; /* ASCII CP */
147 /*********************************************************************
150 unsigned int _mbclen(const unsigned char* str)
152 return MSVCRT_isleadbyte(*str) ? 2 : 1;
155 /*********************************************************************
158 int MSVCRT_mblen(const char* str, MSVCRT_size_t size)
160 if (str && *str && size)
162 if(MSVCRT___mb_cur_max == 1)
163 return 1; /* ASCII CP */
165 return !MSVCRT_isleadbyte(*str) ? 1 : (size>1 ? 2 : -1);
170 /*********************************************************************
173 MSVCRT_size_t _mbslen(const unsigned char* str)
175 if(MSVCRT___mb_cur_max > 1)
177 MSVCRT_size_t len = 0;
180 str += MSVCRT_isleadbyte(*str) ? 2 : 1;
185 return strlen(str); /* ASCII CP */
188 /*********************************************************************
189 * _mbstrlen(MSVCRT.@)
191 MSVCRT_size_t _mbstrlen(const char* str)
193 if(MSVCRT___mb_cur_max > 1)
195 MSVCRT_size_t len = 0;
198 /* FIXME: According to the documentation we are supposed to test for
199 * multi-byte character validity. Whatever that means
201 str += MSVCRT_isleadbyte(*str) ? 2 : 1;
206 return strlen(str); /* ASCII CP */
209 /*********************************************************************
212 void _mbccpy(unsigned char* dest, const unsigned char* src)
215 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*src))
216 *dest = *++src; /* MB char */
218 ERR("failure.. is this ok?\n");
221 /*********************************************************************
224 unsigned char* _mbsncpy(unsigned char* dst, const unsigned char* src, MSVCRT_size_t n)
228 if(MSVCRT___mb_cur_max > 1)
230 unsigned char* ret = dst;
234 if (MSVCRT_isleadbyte(*src++))
241 return strncpy(dst, src, n); /* ASCII CP */
244 /*********************************************************************
245 * _mbsnbcpy(MSVCRT.@)
247 unsigned char* _mbsnbcpy(unsigned char* dst, const unsigned char* src, MSVCRT_size_t n)
251 if(MSVCRT___mb_cur_max > 1)
253 unsigned char* ret = dst;
254 while (*src && (n-- > 1))
257 if (MSVCRT_isleadbyte(*src++))
263 if (*src && n && !MSVCRT_isleadbyte(*src))
265 /* If the last character is a multi-byte character then
266 * we cannot copy it since we have only one byte left
275 return strncpy(dst, src, n); /* ASCII CP */
278 /*********************************************************************
281 int _mbscmp(const unsigned char* str, const unsigned char* cmp)
283 if(MSVCRT___mb_cur_max > 1)
285 unsigned int strc, cmpc;
288 return *cmp ? -1 : 0;
291 strc = _mbsnextc(str);
292 cmpc = _mbsnextc(cmp);
294 return strc < cmpc ? -1 : 1;
295 str +=(strc > 255) ? 2 : 1;
296 cmp +=(strc > 255) ? 2 : 1; /* equal, use same increment */
299 return strcmp(str, cmp); /* ASCII CP */
302 /*********************************************************************
303 * _mbsicoll(MSVCRT.@)
304 * FIXME: handle locales.
306 int _mbsicoll(const unsigned char* str, const unsigned char* cmp)
308 if(MSVCRT___mb_cur_max > 1)
310 unsigned int strc, cmpc;
313 return *cmp ? -1 : 0;
316 strc = _mbctolower(_mbsnextc(str));
317 cmpc = _mbctolower(_mbsnextc(cmp));
319 return strc < cmpc ? -1 : 1;
320 str +=(strc > 255) ? 2 : 1;
321 cmp +=(strc > 255) ? 2 : 1; /* equal, use same increment */
324 return strcasecmp(str, cmp); /* ASCII CP */
328 /*********************************************************************
331 int _mbsicmp(const unsigned char* str, const unsigned char* cmp)
333 if(MSVCRT___mb_cur_max > 1)
335 unsigned int strc, cmpc;
338 return *cmp ? -1 : 0;
341 strc = _mbctolower(_mbsnextc(str));
342 cmpc = _mbctolower(_mbsnextc(cmp));
344 return strc < cmpc ? -1 : 1;
345 str +=(strc > 255) ? 2 : 1;
346 cmp +=(strc > 255) ? 2 : 1; /* equal, use same increment */
349 return strcasecmp(str, cmp); /* ASCII CP */
352 /*********************************************************************
355 int _mbsncmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
360 if(MSVCRT___mb_cur_max > 1)
362 unsigned int strc, cmpc;
367 return *cmp ? -1 : 0;
370 strc = _mbsnextc(str);
371 cmpc = _mbsnextc(cmp);
373 return strc < cmpc ? -1 : 1;
374 inc=(strc > 255) ? 2 : 1; /* Equal, use same increment */
378 return 0; /* Matched len chars */
380 return strncmp(str, cmp, len); /* ASCII CP */
383 /*********************************************************************
384 * _mbsnbcmp(MSVCRT.@)
386 int _mbsnbcmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
390 if(MSVCRT___mb_cur_max > 1)
392 unsigned int strc, cmpc;
397 return *cmp ? -1 : 0;
400 if (MSVCRT_isleadbyte(*str))
402 strc=(len>=2)?_mbsnextc(str):0;
410 if (MSVCRT_isleadbyte(*cmp))
411 cmpc=(len>=2)?_mbsnextc(cmp):0;
415 return strc < cmpc ? -1 : 1;
420 return 0; /* Matched len chars */
422 return strncmp(str,cmp,len);
425 /*********************************************************************
426 * _mbsnicmp(MSVCRT.@)
428 * Compare two multibyte strings case insensitively to 'len' characters.
430 int _mbsnicmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
432 /* FIXME: No tolower() for mb strings yet */
433 if(MSVCRT___mb_cur_max > 1)
435 unsigned int strc, cmpc;
439 return *cmp ? -1 : 0;
442 strc = _mbctolower(_mbsnextc(str));
443 cmpc = _mbctolower(_mbsnextc(cmp));
445 return strc < cmpc ? -1 : 1;
446 str +=(strc > 255) ? 2 : 1;
447 cmp +=(strc > 255) ? 2 : 1; /* Equal, use same increment */
449 return 0; /* Matched len chars */
451 return strncasecmp(str, cmp, len); /* ASCII CP */
454 /*********************************************************************
455 * _mbsnbicmp(MSVCRT.@)
457 int _mbsnbicmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
461 if(MSVCRT___mb_cur_max > 1)
463 unsigned int strc, cmpc;
468 return *cmp ? -1 : 0;
471 if (MSVCRT_isleadbyte(*str))
473 strc=(len>=2)?_mbsnextc(str):0;
481 if (MSVCRT_isleadbyte(*cmp))
482 cmpc=(len>=2)?_mbsnextc(cmp):0;
485 strc = _mbctolower(strc);
486 cmpc = _mbctolower(cmpc);
488 return strc < cmpc ? -1 : 1;
493 return 0; /* Matched len bytes */
495 return strncmp(str,cmp,len);
498 /*********************************************************************
501 * Find a multibyte character in a multibyte string.
503 unsigned char* _mbschr(const unsigned char* s, unsigned int x)
505 if(MSVCRT___mb_cur_max > 1)
512 return (unsigned char*)s;
515 s += c > 255 ? 2 : 1;
518 return strchr(s, x); /* ASCII CP */
521 /*********************************************************************
524 unsigned char* _mbsrchr(const unsigned char* s, unsigned int x)
526 if(MSVCRT___mb_cur_max > 1)
529 unsigned char* match=NULL;
535 match=(unsigned char*)s;
538 s +=(c > 255) ? 2 : 1;
544 /*********************************************************************
547 * Find and extract tokens from strings
549 unsigned char* _mbstok(unsigned char *str, const unsigned char *delim)
551 thread_data_t *data = msvcrt_get_thread_data();
554 if(MSVCRT___mb_cur_max > 1)
559 if (!(str = data->mbstok_next)) return NULL;
561 while ((c = _mbsnextc(str)) && _mbschr(delim, c)) {
562 str += c > 255 ? 2 : 1;
564 if (!*str) return NULL;
566 while ((c = _mbsnextc(str)) && !_mbschr(delim, c)) {
567 str += c > 255 ? 2 : 1;
571 if (c > 255) *str++ = 0;
573 data->mbstok_next = str;
576 return strtok(str, delim); /* ASCII CP */
579 /*********************************************************************
582 int MSVCRT_mbtowc(MSVCRT_wchar_t *dst, const char* str, MSVCRT_size_t n)
584 /* temp var needed because MultiByteToWideChar wants non NULL destination */
585 MSVCRT_wchar_t tmpdst = '\0';
589 if(!MultiByteToWideChar(CP_ACP, 0, str, n, &tmpdst, 1))
593 /* return the number of bytes from src that have been used */
596 if(n >= 2 && MSVCRT_isleadbyte(*str) && str[1])
601 /*********************************************************************
602 * _mbbtombc(MSVCRT.@)
604 unsigned int _mbbtombc(unsigned int c)
606 if(MSVCRT___mb_cur_max > 1 &&
607 ((c >= 0x20 && c <=0x7e) ||(c >= 0xa1 && c <= 0xdf)))
609 /* FIXME: I can't get this function to return anything
610 * different from what I pass it...
613 return c; /* ASCII CP or no MB char */
616 /*********************************************************************
617 * _ismbbkana(MSVCRT.@)
619 int _ismbbkana(unsigned int c)
621 /* FIXME: use lc_ctype when supported, not lc_all */
622 if(msvcrt_current_lc_all_cp == 932)
624 /* Japanese/Katakana, CP 932 */
625 return (c >= 0xa1 && c <= 0xdf);
630 /*********************************************************************
631 * _ismbcdigit(MSVCRT.@)
633 int _ismbcdigit(unsigned int ch)
635 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
636 return (get_char_typeW( wch ) & C1_DIGIT);
639 /*********************************************************************
640 * _ismbcgraph(MSVCRT.@)
642 int _ismbcgraph(unsigned int ch)
644 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
645 return (get_char_typeW( wch ) & (C1_UPPER | C1_LOWER | C1_DIGIT | C1_PUNCT | C1_ALPHA));
648 /*********************************************************************
649 * _ismbcalpha (MSVCRT.@)
651 int _ismbcalpha(unsigned int ch)
653 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
654 return (get_char_typeW( wch ) & C1_ALPHA);
657 /*********************************************************************
658 * _ismbclower (MSVCRT.@)
660 int _ismbclower(unsigned int ch)
662 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
663 return (get_char_typeW( wch ) & C1_UPPER);
666 /*********************************************************************
667 * _ismbcupper (MSVCRT.@)
669 int _ismbcupper(unsigned int ch)
671 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
672 return (get_char_typeW( wch ) & C1_LOWER);
675 /*********************************************************************
676 * _ismbcsymbol(MSVCRT.@)
678 int _ismbcsymbol(unsigned int ch)
680 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
682 if (!GetStringTypeW(CT_CTYPE3, &wch, 1, &ctype))
684 WARN("GetStringTypeW failed on %x\n", ch);
687 return ((ctype & C3_SYMBOL) != 0);
690 /*********************************************************************
691 * _ismbcalnum (MSVCRT.@)
693 int _ismbcalnum(unsigned int ch)
695 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
696 return (get_char_typeW( wch ) & (C1_ALPHA | C1_DIGIT));
699 /*********************************************************************
700 * _ismbcspace (MSVCRT.@)
702 int _ismbcspace(unsigned int ch)
704 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
705 return (get_char_typeW( wch ) & C1_SPACE);
708 /*********************************************************************
709 * _ismbcprint (MSVCRT.@)
711 int _ismbcprint(unsigned int ch)
713 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
714 return (get_char_typeW( wch ) & (C1_UPPER | C1_LOWER | C1_DIGIT | C1_PUNCT | C1_ALPHA | C1_SPACE));
717 /*********************************************************************
718 * _ismbcpunct(MSVCRT.@)
720 int _ismbcpunct(unsigned int ch)
722 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
723 return (get_char_typeW( wch ) & C1_PUNCT);
726 /*********************************************************************
727 * _ismbchira(MSVCRT.@)
729 int _ismbchira(unsigned int c)
731 /* FIXME: use lc_ctype when supported, not lc_all */
732 if(msvcrt_current_lc_all_cp == 932)
734 /* Japanese/Hiragana, CP 932 */
735 return (c >= 0x829f && c <= 0x82f1);
740 /*********************************************************************
741 * _ismbckata(MSVCRT.@)
743 int _ismbckata(unsigned int c)
745 /* FIXME: use lc_ctype when supported, not lc_all */
746 if(msvcrt_current_lc_all_cp == 932)
749 return _ismbbkana(c);
750 /* Japanese/Katakana, CP 932 */
751 return (c >= 0x8340 && c <= 0x8396 && c != 0x837f);
756 /*********************************************************************
757 * _ismbblead(MSVCRT.@)
759 int _ismbblead(unsigned int c)
761 /* FIXME: should reference MSVCRT_mbctype */
762 return MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(c);
766 /*********************************************************************
767 * _ismbbtrail(MSVCRT.@)
769 int _ismbbtrail(unsigned int c)
771 /* FIXME: should reference MSVCRT_mbctype */
772 return !_ismbblead(c);
775 /*********************************************************************
776 * _ismbslead(MSVCRT.@)
778 int _ismbslead(const unsigned char* start, const unsigned char* str)
780 /* Lead bytes can also be trail bytes if caller messed up
781 * iterating through the string...
783 if(MSVCRT___mb_cur_max > 1)
786 start += MSVCRT_isleadbyte(*str) ? 2 : 1;
789 return MSVCRT_isleadbyte(*str);
791 return 0; /* Must have been a trail, we skipped it */
794 /*********************************************************************
795 * _ismbstrail(MSVCRT.@)
797 int _ismbstrail(const unsigned char* start, const unsigned char* str)
799 /* Must not be a lead, and must be preceeded by one */
800 return !_ismbslead(start, str) && MSVCRT_isleadbyte(str[-1]);
803 /*********************************************************************
806 unsigned char* _mbsset(unsigned char* str, unsigned int c)
808 unsigned char* ret = str;
810 if(MSVCRT___mb_cur_max == 1 || c < 256)
811 return _strset(str, c); /* ASCII CP or SB char */
813 c &= 0xffff; /* Strip high bits */
815 while(str[0] && str[1])
821 str[0] = '\0'; /* FIXME: OK to shorten? */
826 /*********************************************************************
827 * _mbsnbset(MSVCRT.@)
829 unsigned char* _mbsnbset(unsigned char *str, unsigned int c, MSVCRT_size_t len)
831 unsigned char *ret = str;
836 if(MSVCRT___mb_cur_max == 1 || c < 256)
837 return _strnset(str, c, len); /* ASCII CP or SB char */
839 c &= 0xffff; /* Strip high bits */
841 while(str[0] && str[1] && (len > 1))
849 /* as per msdn pad with a blank character */
856 /*********************************************************************
859 unsigned char* _mbsnset(unsigned char* str, unsigned int c, MSVCRT_size_t len)
861 unsigned char *ret = str;
866 if(MSVCRT___mb_cur_max == 1 || c < 256)
867 return _strnset(str, c, len); /* ASCII CP or SB char */
869 c &= 0xffff; /* Strip high bits */
871 while(str[0] && str[1] && len--)
877 str[0] = '\0'; /* FIXME: OK to shorten? */
882 /*********************************************************************
883 * _mbsnccnt(MSVCRT.@)
884 * 'c' is for 'character'.
886 MSVCRT_size_t _mbsnccnt(const unsigned char* str, MSVCRT_size_t len)
889 if(MSVCRT___mb_cur_max > 1)
892 while(*str && len-- > 0)
894 if(MSVCRT_isleadbyte(*str))
907 return min(ret, len); /* ASCII CP */
910 /*********************************************************************
911 * _mbsnbcnt(MSVCRT.@)
912 * 'b' is for byte count.
914 MSVCRT_size_t _mbsnbcnt(const unsigned char* str, MSVCRT_size_t len)
917 if(MSVCRT___mb_cur_max > 1)
919 const unsigned char* xstr = str;
920 while(*xstr && len-- > 0)
922 if (MSVCRT_isleadbyte(*xstr++))
928 return min(ret, len); /* ASCII CP */
932 /*********************************************************************
933 * _mbsnbcat(MSVCRT.@)
935 unsigned char* _mbsnbcat(unsigned char* dst, const unsigned char* src, MSVCRT_size_t len)
937 if(MSVCRT___mb_cur_max > 1)
941 if (MSVCRT_isleadbyte(*dst++)) {
945 /* as per msdn overwrite the lead byte in front of '\0' */
951 while (*src && len--) *dst++ = *src++;
955 return strncat(dst, src, len); /* ASCII CP */
959 /*********************************************************************
962 unsigned char* _mbsncat(unsigned char* dst, const unsigned char* src, MSVCRT_size_t len)
964 if(MSVCRT___mb_cur_max > 1)
969 if (MSVCRT_isleadbyte(*dst++))
972 while (*src && len--)
975 if(MSVCRT_isleadbyte(*src++))
981 return strncat(dst, src, len); /* ASCII CP */
985 /*********************************************************************
988 unsigned char* _mbslwr(unsigned char* s)
992 if (MSVCRT___mb_cur_max > 1)
998 c = _mbctolower(_mbsnextc(s));
999 /* Note that I assume that the size of the character is unchanged */
1013 /*********************************************************************
1016 unsigned char* _mbsupr(unsigned char* s)
1020 if (MSVCRT___mb_cur_max > 1)
1026 c = _mbctoupper(_mbsnextc(s));
1027 /* Note that I assume that the size of the character is unchanged */
1041 /*********************************************************************
1042 * _mbsspn (MSVCRT.@)
1044 MSVCRT_size_t _mbsspn(const unsigned char* string, const unsigned char* set)
1046 const unsigned char *p, *q;
1048 for (p = string; *p; p++)
1050 if (MSVCRT_isleadbyte(*p))
1052 for (q = set; *q; q++)
1056 if ((*p == *q) && (p[1] == q[1]))
1064 for (q = set; *q; q++)
1071 /*********************************************************************
1072 * _mbscspn(MSVCRT.@)
1074 MSVCRT_size_t _mbscspn(const unsigned char* str, const unsigned char* cmp)
1076 if (MSVCRT___mb_cur_max > 1)
1077 FIXME("don't handle double character case\n");
1078 return strcspn(str, cmp);
1081 /*********************************************************************
1082 * _mbsrev (MSVCRT.@)
1084 unsigned char* _mbsrev(unsigned char* str)
1086 int i, len = _mbslen(str);
1087 unsigned char *p, *temp=MSVCRT_malloc(len*2);
1092 /* unpack multibyte string to temp buffer */
1094 for(i=0; i<len; i++)
1096 if (MSVCRT_isleadbyte(*p))
1108 /* repack it in the reverse order */
1110 for(i=len-1; i>=0; i--)
1112 if(MSVCRT_isleadbyte(temp[i*2]))
1128 /*********************************************************************
1129 * _mbspbrk (MSVCRT.@)
1131 unsigned char* _mbspbrk(const unsigned char* str, const unsigned char* accept)
1133 const unsigned char* p;
1137 for(p = accept; *p; p += (MSVCRT_isleadbyte(*p)?2:1) )
1140 if( !MSVCRT_isleadbyte(*p) || ( *(p+1) == *(str+1) ) )
1141 return (unsigned char*)str;
1143 str += (MSVCRT_isleadbyte(*str)?2:1);