2 * msvcrt.dll mbcs functions
4 * Copyright 1999 Alexandre Julliard
5 * Copyright 2000 Jon Griffths
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Not currently binary compatible with win32. MSVCRT_mbctype must be
23 * populated correctly and the ismb* functions should reference it.
28 #include "msvcrt/mbctype.h"
29 #include "msvcrt/mbstring.h"
30 #include "msvcrt/stdlib.h"
31 #include "msvcrt/string.h"
32 #include "msvcrt/wctype.h"
34 #include "wine/unicode.h"
35 #include "wine/debug.h"
37 WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);
39 unsigned char MSVCRT_mbctype[257];
40 int MSVCRT___mb_cur_max = 1;
42 static MSVCRT_wchar_t msvcrt_mbc_to_wc(unsigned int ch)
52 mbch[0] = (ch >> 8) & 0xff;
56 if (!MultiByteToWideChar(MSVCRT_current_lc_all_cp, 0, mbch, n_chars, &chW, 1))
58 WARN("MultiByteToWideChar failed on %x\n", ch);
64 /*********************************************************************
65 * __p__mbctype (MSVCRT.@)
67 unsigned char* __p__mbctype(void)
69 return MSVCRT_mbctype;
72 /*********************************************************************
73 * __p___mb_cur_max(MSVCRT.@)
75 int* __p___mb_cur_max(void)
77 return &MSVCRT___mb_cur_max;
80 /*********************************************************************
83 unsigned int _mbsnextc(const unsigned char* str)
85 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*str))
86 return *str << 8 | str[1];
87 return *str; /* ASCII CP or SB char */
90 /*********************************************************************
91 * _mbctolower(MSVCRT.@)
93 unsigned int _mbctolower(unsigned int c)
95 if (MSVCRT_isleadbyte(c))
97 FIXME("Handle MBC chars\n");
100 return tolower(c); /* ASCII CP or SB char */
103 /*********************************************************************
104 * _mbctoupper(MSVCRT.@)
106 unsigned int _mbctoupper(unsigned int c)
108 if (MSVCRT_isleadbyte(c))
110 FIXME("Handle MBC chars\n");
113 return toupper(c); /* ASCII CP or SB char */
116 /*********************************************************************
119 unsigned char* _mbsdec(const unsigned char* start, const unsigned char* cur)
121 if(MSVCRT___mb_cur_max > 1)
122 return (char *)(_ismbstrail(start,cur-1) ? cur - 2 : cur -1);
124 return (char *)cur - 1; /* ASCII CP or SB char */
127 /*********************************************************************
130 unsigned char* _mbsinc(const unsigned char* str)
132 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*str))
133 return (unsigned char*)str + 2; /* MB char */
135 return (unsigned char*)str + 1; /* ASCII CP or SB char */
138 /*********************************************************************
141 unsigned char* _mbsninc(const unsigned char* str, MSVCRT_size_t num)
145 if(MSVCRT___mb_cur_max > 1)
149 return (unsigned char*)str;
151 return (unsigned char*)str + num; /* ASCII CP */
154 /*********************************************************************
157 unsigned int _mbclen(const unsigned char* str)
159 return MSVCRT_isleadbyte(*str) ? 2 : 1;
162 /*********************************************************************
165 int MSVCRT_mblen(const char* str, MSVCRT_size_t size)
167 if (str && *str && size)
169 if(MSVCRT___mb_cur_max == 1)
170 return 1; /* ASCII CP */
172 return !MSVCRT_isleadbyte(*str) ? 1 : (size>1 ? 2 : -1);
177 /*********************************************************************
180 MSVCRT_size_t _mbslen(const unsigned char* str)
182 if(MSVCRT___mb_cur_max > 1)
184 MSVCRT_size_t len = 0;
187 str += MSVCRT_isleadbyte(*str) ? 2 : 1;
192 return strlen(str); /* ASCII CP */
195 /*********************************************************************
196 * _mbstrlen(MSVCRT.@)
198 MSVCRT_size_t _mbstrlen(const char* str)
200 if(MSVCRT___mb_cur_max > 1)
202 MSVCRT_size_t len = 0;
205 /* FIXME: According to the documentation we are supposed to test for
206 * multi-byte character validity. Whatever that means
208 str += MSVCRT_isleadbyte(*str) ? 2 : 1;
213 return strlen(str); /* ASCII CP */
216 /*********************************************************************
219 void _mbccpy(unsigned char* dest, const unsigned char* src)
222 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*src))
223 *dest = *++src; /* MB char */
225 ERR("failure.. is this ok?\n");
228 /*********************************************************************
231 unsigned char* _mbsncpy(unsigned char* dst, const unsigned char* src, MSVCRT_size_t n)
235 if(MSVCRT___mb_cur_max > 1)
237 unsigned char* ret = dst;
241 if (MSVCRT_isleadbyte(*src++))
248 return strncpy(dst, src, n); /* ASCII CP */
251 /*********************************************************************
252 * _mbsnbcpy(MSVCRT.@)
254 unsigned char* _mbsnbcpy(unsigned char* dst, const unsigned char* src, MSVCRT_size_t n)
258 if(MSVCRT___mb_cur_max > 1)
260 unsigned char* ret = dst;
261 while (*src && (n-- > 1))
264 if (MSVCRT_isleadbyte(*src++))
270 if (*src && n && !MSVCRT_isleadbyte(*src))
272 /* If the last character is a multi-byte character then
273 * we cannot copy it since we have only one byte left
282 return strncpy(dst, src, n); /* ASCII CP */
285 /*********************************************************************
288 int _mbscmp(const unsigned char* str, const unsigned char* cmp)
290 if(MSVCRT___mb_cur_max > 1)
292 unsigned int strc, cmpc;
295 return *cmp ? -1 : 0;
298 strc = _mbsnextc(str);
299 cmpc = _mbsnextc(cmp);
301 return strc < cmpc ? -1 : 1;
302 str +=(strc > 255) ? 2 : 1;
303 cmp +=(strc > 255) ? 2 : 1; /* equal, use same increment */
306 return strcmp(str, cmp); /* ASCII CP */
309 /*********************************************************************
310 * _mbsicoll(MSVCRT.@)
311 * FIXME: handle locales.
313 int _mbsicoll(const unsigned char* str, const unsigned char* cmp)
315 if(MSVCRT___mb_cur_max > 1)
317 unsigned int strc, cmpc;
320 return *cmp ? -1 : 0;
323 strc = _mbctolower(_mbsnextc(str));
324 cmpc = _mbctolower(_mbsnextc(cmp));
326 return strc < cmpc ? -1 : 1;
327 str +=(strc > 255) ? 2 : 1;
328 cmp +=(strc > 255) ? 2 : 1; /* equal, use same increment */
331 return strcasecmp(str, cmp); /* ASCII CP */
335 /*********************************************************************
338 int _mbsicmp(const unsigned char* str, const unsigned char* cmp)
340 if(MSVCRT___mb_cur_max > 1)
342 unsigned int strc, cmpc;
345 return *cmp ? -1 : 0;
348 strc = _mbctolower(_mbsnextc(str));
349 cmpc = _mbctolower(_mbsnextc(cmp));
351 return strc < cmpc ? -1 : 1;
352 str +=(strc > 255) ? 2 : 1;
353 cmp +=(strc > 255) ? 2 : 1; /* equal, use same increment */
356 return strcasecmp(str, cmp); /* ASCII CP */
359 /*********************************************************************
362 int _mbsncmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
367 if(MSVCRT___mb_cur_max > 1)
369 unsigned int strc, cmpc;
374 return *cmp ? -1 : 0;
377 strc = _mbsnextc(str);
378 cmpc = _mbsnextc(cmp);
380 return strc < cmpc ? -1 : 1;
381 inc=(strc > 255) ? 2 : 1; /* Equal, use same increment */
385 return 0; /* Matched len chars */
387 return strncmp(str, cmp, len); /* ASCII CP */
390 /*********************************************************************
391 * _mbsnbcmp(MSVCRT.@)
393 int _mbsnbcmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
397 if(MSVCRT___mb_cur_max > 1)
399 unsigned int strc, cmpc;
404 return *cmp ? -1 : 0;
407 if (MSVCRT_isleadbyte(*str))
409 strc=(len>=2)?_mbsnextc(str):0;
417 if (MSVCRT_isleadbyte(*cmp))
418 cmpc=(len>=2)?_mbsnextc(cmp):0;
422 return strc < cmpc ? -1 : 1;
427 return 0; /* Matched len chars */
429 return strncmp(str,cmp,len);
432 /*********************************************************************
433 * _mbsnicmp(MSVCRT.@)
435 * Compare two multibyte strings case insensitively to 'len' characters.
437 int _mbsnicmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
439 /* FIXME: No tolower() for mb strings yet */
440 if(MSVCRT___mb_cur_max > 1)
442 unsigned int strc, cmpc;
446 return *cmp ? -1 : 0;
449 strc = _mbctolower(_mbsnextc(str));
450 cmpc = _mbctolower(_mbsnextc(cmp));
452 return strc < cmpc ? -1 : 1;
453 str +=(strc > 255) ? 2 : 1;
454 cmp +=(strc > 255) ? 2 : 1; /* Equal, use same increment */
456 return 0; /* Matched len chars */
458 return strncasecmp(str, cmp, len); /* ASCII CP */
461 /*********************************************************************
462 * _mbsnbicmp(MSVCRT.@)
464 int _mbsnbicmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
468 if(MSVCRT___mb_cur_max > 1)
470 unsigned int strc, cmpc;
475 return *cmp ? -1 : 0;
478 if (MSVCRT_isleadbyte(*str))
480 strc=(len>=2)?_mbsnextc(str):0;
488 if (MSVCRT_isleadbyte(*cmp))
489 cmpc=(len>=2)?_mbsnextc(cmp):0;
492 strc = _mbctolower(strc);
493 cmpc = _mbctolower(cmpc);
495 return strc < cmpc ? -1 : 1;
500 return 0; /* Matched len bytes */
502 return strncmp(str,cmp,len);
505 /*********************************************************************
508 * Find a multibyte character in a multibyte string.
510 unsigned char* _mbschr(const unsigned char* s, unsigned int x)
512 if(MSVCRT___mb_cur_max > 1)
519 return (unsigned char*)s;
522 s += c > 255 ? 2 : 1;
525 return strchr(s, x); /* ASCII CP */
528 /*********************************************************************
531 unsigned char* _mbsrchr(const unsigned char* s, unsigned int x)
533 if(MSVCRT___mb_cur_max > 1)
536 unsigned char* match=NULL;
542 match=(unsigned char*)s;
545 s +=(c > 255) ? 2 : 1;
551 /*********************************************************************
554 * Find and extract tokens from strings
556 unsigned char* _mbstok(unsigned char *str, const unsigned char *delim)
558 MSVCRT_thread_data *data = msvcrt_get_thread_data();
561 if(MSVCRT___mb_cur_max > 1)
566 if (!(str = data->mbstok_next)) return NULL;
568 while ((c = _mbsnextc(str)) && _mbschr(delim, c)) {
569 str += c > 255 ? 2 : 1;
571 if (!*str) return NULL;
573 while ((c = _mbsnextc(str)) && !_mbschr(delim, c)) {
574 str += c > 255 ? 2 : 1;
578 if (c > 255) *str++ = 0;
580 data->mbstok_next = str;
583 return strtok(str, delim); /* ASCII CP */
586 /*********************************************************************
589 int MSVCRT_mbtowc(MSVCRT_wchar_t *dst, const char* str, MSVCRT_size_t n)
591 /* temp var needed because MultiByteToWideChar wants non NULL destination */
592 MSVCRT_wchar_t tmpdst = '\0';
596 if(!MultiByteToWideChar(CP_ACP, 0, str, n, &tmpdst, 1))
600 /* return the number of bytes from src that have been used */
603 if(n >= 2 && MSVCRT_isleadbyte(*str) && str[1])
608 /*********************************************************************
609 * _mbbtombc(MSVCRT.@)
611 unsigned int _mbbtombc(unsigned int c)
613 if(MSVCRT___mb_cur_max > 1 &&
614 ((c >= 0x20 && c <=0x7e) ||(c >= 0xa1 && c <= 0xdf)))
616 /* FIXME: I can't get this function to return anything
617 * different to what I pass it...
620 return c; /* ASCII CP or no MB char */
623 /*********************************************************************
624 * _ismbbkana(MSVCRT.@)
626 int _ismbbkana(unsigned int c)
628 /* FIXME: use lc_ctype when supported, not lc_all */
629 if(MSVCRT_current_lc_all_cp == 932)
631 /* Japanese/Katakana, CP 932 */
632 return (c >= 0xa1 && c <= 0xdf);
637 /*********************************************************************
638 * _ismbcdigit(MSVCRT.@)
640 int _ismbcdigit(unsigned int ch)
642 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
643 return (get_char_typeW( wch ) & C1_DIGIT);
646 /*********************************************************************
647 * _ismbcgraph(MSVCRT.@)
649 int _ismbcgraph(unsigned int ch)
651 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
652 return (get_char_typeW( wch ) & (C1_UPPER | C1_LOWER | C1_DIGIT | C1_PUNCT | C1_ALPHA));
655 /*********************************************************************
656 * _ismbcalpha (MSVCRT.@)
658 int _ismbcalpha(unsigned int ch)
660 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
661 return (get_char_typeW( wch ) & C1_ALPHA);
664 /*********************************************************************
665 * _ismbclower (MSVCRT.@)
667 int _ismbclower(unsigned int ch)
669 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
670 return (get_char_typeW( wch ) & C1_UPPER);
673 /*********************************************************************
674 * _ismbcupper (MSVCRT.@)
676 int _ismbcupper(unsigned int ch)
678 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
679 return (get_char_typeW( wch ) & C1_LOWER);
682 /*********************************************************************
683 * _ismbcsymbol(MSVCRT.@)
685 int _ismbcsymbol(unsigned int ch)
687 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
689 if (!GetStringTypeW(CT_CTYPE3, &wch, 1, &ctype))
691 WARN("GetStringTypeW failed on %x\n", ch);
694 return ((ctype & C3_SYMBOL) != 0);
697 /*********************************************************************
698 * _ismbcalnum (MSVCRT.@)
700 int _ismbcalnum(unsigned int ch)
702 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
703 return (get_char_typeW( wch ) & (C1_ALPHA | C1_DIGIT));
706 /*********************************************************************
707 * _ismbcspace (MSVCRT.@)
709 int _ismbcspace(unsigned int ch)
711 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
712 return (get_char_typeW( wch ) & C1_SPACE);
715 /*********************************************************************
716 * _ismbcprint (MSVCRT.@)
718 int _ismbcprint(unsigned int ch)
720 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
721 return (get_char_typeW( wch ) & (C1_UPPER | C1_LOWER | C1_DIGIT | C1_PUNCT | C1_ALPHA | C1_SPACE));
724 /*********************************************************************
725 * _ismbcpunct(MSVCRT.@)
727 int _ismbcpunct(unsigned int ch)
729 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
730 return (get_char_typeW( wch ) & C1_PUNCT);
733 /*********************************************************************
734 * _ismbchira(MSVCRT.@)
736 int _ismbchira(unsigned int c)
738 /* FIXME: use lc_ctype when supported, not lc_all */
739 if(MSVCRT_current_lc_all_cp == 932)
741 /* Japanese/Hiragana, CP 932 */
742 return (c >= 0x829f && c <= 0x82f1);
747 /*********************************************************************
748 * _ismbckata(MSVCRT.@)
750 int _ismbckata(unsigned int c)
752 /* FIXME: use lc_ctype when supported, not lc_all */
753 if(MSVCRT_current_lc_all_cp == 932)
756 return _ismbbkana(c);
757 /* Japanese/Katakana, CP 932 */
758 return (c >= 0x8340 && c <= 0x8396 && c != 0x837f);
763 /*********************************************************************
764 * _ismbblead(MSVCRT.@)
766 int _ismbblead(unsigned int c)
768 /* FIXME: should reference MSVCRT_mbctype */
769 return MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(c);
773 /*********************************************************************
774 * _ismbbtrail(MSVCRT.@)
776 int _ismbbtrail(unsigned int c)
778 /* FIXME: should reference MSVCRT_mbctype */
779 return !_ismbblead(c);
782 /*********************************************************************
783 * _ismbslead(MSVCRT.@)
785 int _ismbslead(const unsigned char* start, const unsigned char* str)
787 /* Lead bytes can also be trail bytes if caller messed up
788 * iterating through the string...
790 if(MSVCRT___mb_cur_max > 1)
793 start += MSVCRT_isleadbyte(*str) ? 2 : 1;
796 return MSVCRT_isleadbyte(*str);
798 return 0; /* Must have been a trail, we skipped it */
801 /*********************************************************************
802 * _ismbstrail(MSVCRT.@)
804 int _ismbstrail(const unsigned char* start, const unsigned char* str)
806 /* Must not be a lead, and must be preceeded by one */
807 return !_ismbslead(start, str) && MSVCRT_isleadbyte(str[-1]);
810 /*********************************************************************
813 unsigned char* _mbsset(unsigned char* str, unsigned int c)
815 unsigned char* ret = str;
817 if(MSVCRT___mb_cur_max == 1 || c < 256)
818 return _strset(str, c); /* ASCII CP or SB char */
820 c &= 0xffff; /* Strip high bits */
822 while(str[0] && str[1])
828 str[0] = '\0'; /* FIXME: OK to shorten? */
833 /*********************************************************************
834 * _mbsnbset(MSVCRT.@)
836 unsigned char* _mbsnbset(unsigned char *str, unsigned int c, MSVCRT_size_t len)
838 unsigned char *ret = str;
843 if(MSVCRT___mb_cur_max == 1 || c < 256)
844 return _strnset(str, c, len); /* ASCII CP or SB char */
846 c &= 0xffff; /* Strip high bits */
848 while(str[0] && str[1] && (len > 1))
856 /* as per msdn pad with a blank character */
863 /*********************************************************************
866 unsigned char* _mbsnset(unsigned char* str, unsigned int c, MSVCRT_size_t len)
868 unsigned char *ret = str;
873 if(MSVCRT___mb_cur_max == 1 || c < 256)
874 return _strnset(str, c, len); /* ASCII CP or SB char */
876 c &= 0xffff; /* Strip high bits */
878 while(str[0] && str[1] && len--)
884 str[0] = '\0'; /* FIXME: OK to shorten? */
889 /*********************************************************************
890 * _mbsnccnt(MSVCRT.@)
891 * 'c' is for 'character'.
893 MSVCRT_size_t _mbsnccnt(const unsigned char* str, MSVCRT_size_t len)
896 if(MSVCRT___mb_cur_max > 1)
899 while(*str && len-- > 0)
901 if(MSVCRT_isleadbyte(*str))
914 return min(ret, len); /* ASCII CP */
917 /*********************************************************************
918 * _mbsnbcnt(MSVCRT.@)
919 * 'b' is for byte count.
921 MSVCRT_size_t _mbsnbcnt(const unsigned char* str, MSVCRT_size_t len)
924 if(MSVCRT___mb_cur_max > 1)
926 const unsigned char* xstr = str;
927 while(*xstr && len-- > 0)
929 if (MSVCRT_isleadbyte(*xstr++))
935 return min(ret, len); /* ASCII CP */
939 /*********************************************************************
940 * _mbsnbcat(MSVCRT.@)
942 unsigned char* _mbsnbcat(unsigned char* dst, const unsigned char* src, MSVCRT_size_t len)
944 if(MSVCRT___mb_cur_max > 1)
948 if (MSVCRT_isleadbyte(*dst++)) {
952 /* as per msdn overwrite the lead byte in front of '\0' */
958 while (*src && len--) *dst++ = *src++;
962 return strncat(dst, src, len); /* ASCII CP */
966 /*********************************************************************
969 unsigned char* _mbsncat(unsigned char* dst, const unsigned char* src, MSVCRT_size_t len)
971 if(MSVCRT___mb_cur_max > 1)
976 if (MSVCRT_isleadbyte(*dst++))
979 while (*src && len--)
982 if(MSVCRT_isleadbyte(*src++))
988 return strncat(dst, src, len); /* ASCII CP */
992 /*********************************************************************
995 unsigned char* _mbslwr(unsigned char* s)
999 if (MSVCRT___mb_cur_max > 1)
1005 c = _mbctolower(_mbsnextc(s));
1006 /* Note that I assume that the size of the character is unchanged */
1020 /*********************************************************************
1023 unsigned char* _mbsupr(unsigned char* s)
1027 if (MSVCRT___mb_cur_max > 1)
1033 c = _mbctoupper(_mbsnextc(s));
1034 /* Note that I assume that the size of the character is unchanged */
1048 /*********************************************************************
1049 * _mbsspn (MSVCRT.@)
1051 MSVCRT_size_t _mbsspn(const unsigned char* string, const unsigned char* set)
1053 const unsigned char *p, *q;
1055 for (p = string; *p; p++)
1057 if (MSVCRT_isleadbyte(*p))
1059 for (q = set; *q; q++)
1063 if ((*p == *q) && (p[1] == q[1]))
1071 for (q = set; *q; q++)
1078 /*********************************************************************
1079 * _mbscspn(MSVCRT.@)
1081 MSVCRT_size_t _mbscspn(const unsigned char* str, const unsigned char* cmp)
1083 if (MSVCRT___mb_cur_max > 1)
1084 FIXME("don't handle double character case\n");
1085 return strcspn(str, cmp);
1088 /*********************************************************************
1089 * _mbsrev (MSVCRT.@)
1091 unsigned char* _mbsrev(unsigned char* str)
1093 int i, len = _mbslen(str);
1094 unsigned char *p, *temp=MSVCRT_malloc(len*2);
1099 /* unpack multibyte string to temp buffer */
1101 for(i=0; i<len; i++)
1103 if (MSVCRT_isleadbyte(*p))
1115 /* repack it in the reverse order */
1117 for(i=len-1; i>=0; i--)
1119 if(MSVCRT_isleadbyte(temp[i*2]))
1135 /*********************************************************************
1136 * _mbspbrk (MSVCRT.@)
1138 unsigned char* _mbspbrk(const unsigned char* str, const unsigned char* accept)
1140 const unsigned char* p;
1144 for(p = accept; *p; p += (MSVCRT_isleadbyte(*p)?2:1) )
1147 if( !MSVCRT_isleadbyte(*p) || ( *(p+1) == *(str+1) ) )
1148 return (unsigned char*)str;
1150 str += (MSVCRT_isleadbyte(*str)?2:1);