2 * msvcrt.dll mbcs functions
4 * Copyright 1999 Alexandre Julliard
5 * Copyright 2000 Jon Griffths
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Not currently binary compatible with win32. MSVCRT_mbctype must be
23 * populated correctly and the ismb* functions should reference it.
28 #include "msvcrt/mbctype.h"
29 #include "msvcrt/mbstring.h"
30 #include "msvcrt/stdlib.h"
31 #include "msvcrt/string.h"
32 #include "msvcrt/wctype.h"
34 #include "wine/unicode.h"
35 #include "wine/debug.h"
37 WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);
39 unsigned char MSVCRT_mbctype[257];
40 int MSVCRT___mb_cur_max = 1;
42 static MSVCRT_wchar_t msvcrt_mbc_to_wc(unsigned int ch)
52 mbch[0] = (ch >> 8) & 0xff;
56 if (!MultiByteToWideChar(MSVCRT_current_lc_all_cp, 0, mbch, n_chars, &chW, 1))
58 WARN("MultiByteToWideChar failed on %x\n", ch);
64 /*********************************************************************
65 * __p__mbctype (MSVCRT.@)
67 unsigned char* __p__mbctype(void)
69 return MSVCRT_mbctype;
72 /*********************************************************************
73 * __p___mb_cur_max(MSVCRT.@)
75 int* __p___mb_cur_max(void)
77 return &MSVCRT___mb_cur_max;
80 /*********************************************************************
83 unsigned int _mbsnextc(const unsigned char* str)
85 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*str))
86 return *str << 8 | str[1];
87 return *str; /* ASCII CP or SB char */
90 /*********************************************************************
91 * _mbctolower(MSVCRT.@)
93 unsigned int _mbctolower(unsigned int c)
95 if (MSVCRT_isleadbyte(c))
97 FIXME("Handle MBC chars\n");
100 return tolower(c); /* ASCII CP or SB char */
103 /*********************************************************************
104 * _mbctoupper(MSVCRT.@)
106 unsigned int _mbctoupper(unsigned int c)
108 if (MSVCRT_isleadbyte(c))
110 FIXME("Handle MBC chars\n");
113 return toupper(c); /* ASCII CP or SB char */
116 /*********************************************************************
119 unsigned char* _mbsdec(const unsigned char* start, const unsigned char* cur)
121 if(MSVCRT___mb_cur_max > 1)
122 return (char *)(_ismbstrail(start,cur-1) ? cur - 2 : cur -1);
124 return (char *)cur - 1; /* ASCII CP or SB char */
127 /*********************************************************************
130 unsigned char* _mbsinc(const unsigned char* str)
132 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*str))
133 return (unsigned char*)str + 2; /* MB char */
135 return (unsigned char*)str + 1; /* ASCII CP or SB char */
138 /*********************************************************************
141 unsigned char* _mbsninc(const unsigned char* str, MSVCRT_size_t num)
145 if(MSVCRT___mb_cur_max > 1)
149 return (unsigned char*)str;
151 return (unsigned char*)str + num; /* ASCII CP */
154 /*********************************************************************
157 unsigned int _mbclen(const unsigned char* str)
159 return MSVCRT_isleadbyte(*str) ? 2 : 1;
162 /*********************************************************************
165 int MSVCRT_mblen(const char* str, MSVCRT_size_t size)
167 if (str && *str && size)
169 if(MSVCRT___mb_cur_max == 1)
170 return 1; /* ASCII CP */
172 return !MSVCRT_isleadbyte(*str) ? 1 : (size>1 ? 2 : -1);
177 /*********************************************************************
180 MSVCRT_size_t _mbslen(const unsigned char* str)
182 if(MSVCRT___mb_cur_max > 1)
184 MSVCRT_size_t len = 0;
187 str += MSVCRT_isleadbyte(*str) ? 2 : 1;
192 return strlen(str); /* ASCII CP */
195 /*********************************************************************
196 * _mbstrlen(MSVCRT.@)
198 MSVCRT_size_t _mbstrlen(const char* str)
200 if(MSVCRT___mb_cur_max > 1)
202 MSVCRT_size_t len = 0;
205 /* FIXME: According to the documentation we are supposed to test for
206 * multi-byte character validity. Whatever that means
208 str += MSVCRT_isleadbyte(*str) ? 2 : 1;
213 return strlen(str); /* ASCII CP */
216 /*********************************************************************
219 void _mbccpy(unsigned char* dest, const unsigned char* src)
222 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*src))
223 *dest = *++src; /* MB char */
225 ERR("failure.. is this ok?\n");
228 /*********************************************************************
231 unsigned char* _mbsncpy(unsigned char* dst, const unsigned char* src, MSVCRT_size_t n)
235 if(MSVCRT___mb_cur_max > 1)
237 unsigned char* ret = dst;
241 if (MSVCRT_isleadbyte(*src++))
248 return strncpy(dst, src, n); /* ASCII CP */
251 /*********************************************************************
252 * _mbsnbcpy(MSVCRT.@)
254 unsigned char* _mbsnbcpy(unsigned char* dst, const unsigned char* src, MSVCRT_size_t n)
258 if(MSVCRT___mb_cur_max > 1)
260 unsigned char* ret = dst;
261 while (*src && (n-- > 1))
264 if (MSVCRT_isleadbyte(*src++))
270 if (*src && n && !MSVCRT_isleadbyte(*src))
272 /* If the last character is a multi-byte character then
273 * we cannot copy it since we have only one byte left
282 return strncpy(dst, src, n); /* ASCII CP */
285 /*********************************************************************
288 int _mbscmp(const unsigned char* str, const unsigned char* cmp)
290 if(MSVCRT___mb_cur_max > 1)
292 unsigned int strc, cmpc;
295 return *cmp ? -1 : 0;
298 strc = _mbsnextc(str);
299 cmpc = _mbsnextc(cmp);
301 return strc < cmpc ? -1 : 1;
302 str +=(strc > 255) ? 2 : 1;
303 cmp +=(strc > 255) ? 2 : 1; /* equal, use same increment */
306 return strcmp(str, cmp); /* ASCII CP */
309 /*********************************************************************
310 * _mbsicoll(MSVCRT.@)
311 * FIXME: handle locales.
313 int _mbsicoll(const unsigned char* str, const unsigned char* cmp)
315 if(MSVCRT___mb_cur_max > 1)
317 unsigned int strc, cmpc;
320 return *cmp ? -1 : 0;
323 strc = _mbctolower(_mbsnextc(str));
324 cmpc = _mbctolower(_mbsnextc(cmp));
326 return strc < cmpc ? -1 : 1;
327 str +=(strc > 255) ? 2 : 1;
328 cmp +=(strc > 255) ? 2 : 1; /* equal, use same increment */
331 return strcasecmp(str, cmp); /* ASCII CP */
335 /*********************************************************************
338 int _mbsicmp(const unsigned char* str, const unsigned char* cmp)
340 if(MSVCRT___mb_cur_max > 1)
342 unsigned int strc, cmpc;
345 return *cmp ? -1 : 0;
348 strc = _mbctolower(_mbsnextc(str));
349 cmpc = _mbctolower(_mbsnextc(cmp));
351 return strc < cmpc ? -1 : 1;
352 str +=(strc > 255) ? 2 : 1;
353 cmp +=(strc > 255) ? 2 : 1; /* equal, use same increment */
356 return strcasecmp(str, cmp); /* ASCII CP */
359 /*********************************************************************
362 int _mbsncmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
367 if(MSVCRT___mb_cur_max > 1)
369 unsigned int strc, cmpc;
374 return *cmp ? -1 : 0;
377 strc = _mbsnextc(str);
378 cmpc = _mbsnextc(cmp);
380 return strc < cmpc ? -1 : 1;
381 inc=(strc > 255) ? 2 : 1; /* Equal, use same increment */
385 return 0; /* Matched len chars */
387 return strncmp(str, cmp, len); /* ASCII CP */
390 /*********************************************************************
391 * _mbsnbcmp(MSVCRT.@)
393 int _mbsnbcmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
397 if(MSVCRT___mb_cur_max > 1)
399 unsigned int strc, cmpc;
404 return *cmp ? -1 : 0;
407 if (MSVCRT_isleadbyte(*str))
409 strc=(len>=2)?_mbsnextc(str):0;
417 if (MSVCRT_isleadbyte(*cmp))
418 cmpc=(len>=2)?_mbsnextc(cmp):0;
422 return strc < cmpc ? -1 : 1;
427 return 0; /* Matched len chars */
428 FIXME("%s %s %d\n",str,cmp,len);
430 return strncmp(str,cmp,len);
433 /*********************************************************************
434 * _mbsnicmp(MSVCRT.@)
436 * Compare two multibyte strings case insensitively to 'len' characters.
438 int _mbsnicmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
440 /* FIXME: No tolower() for mb strings yet */
441 if(MSVCRT___mb_cur_max > 1)
443 unsigned int strc, cmpc;
447 return *cmp ? -1 : 0;
450 strc = _mbctolower(_mbsnextc(str));
451 cmpc = _mbctolower(_mbsnextc(cmp));
453 return strc < cmpc ? -1 : 1;
454 str +=(strc > 255) ? 2 : 1;
455 cmp +=(strc > 255) ? 2 : 1; /* Equal, use same increment */
457 return 0; /* Matched len chars */
459 return strncasecmp(str, cmp, len); /* ASCII CP */
462 /*********************************************************************
463 * _mbsnbicmp(MSVCRT.@)
465 int _mbsnbicmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
469 if(MSVCRT___mb_cur_max > 1)
471 unsigned int strc, cmpc;
476 return *cmp ? -1 : 0;
479 if (MSVCRT_isleadbyte(*str))
481 strc=(len>=2)?_mbsnextc(str):0;
489 if (MSVCRT_isleadbyte(*cmp))
490 cmpc=(len>=2)?_mbsnextc(cmp):0;
493 strc = _mbctolower(strc);
494 cmpc = _mbctolower(cmpc);
496 return strc < cmpc ? -1 : 1;
501 return 0; /* Matched len bytes */
502 FIXME("%s %s %d\n",str,cmp,len);
504 return strncmp(str,cmp,len);
507 /*********************************************************************
510 * Find a multibyte character in a multibyte string.
512 unsigned char* _mbschr(const unsigned char* s, unsigned int x)
514 if(MSVCRT___mb_cur_max > 1)
521 return (unsigned char*)s;
524 s += c > 255 ? 2 : 1;
527 return strchr(s, x); /* ASCII CP */
530 /*********************************************************************
533 unsigned char* _mbsrchr(const unsigned char* s, unsigned int x)
535 if(MSVCRT___mb_cur_max > 1)
538 unsigned char* match=NULL;
544 match=(unsigned char*)s;
547 s +=(c > 255) ? 2 : 1;
553 /*********************************************************************
556 int MSVCRT_mbtowc(MSVCRT_wchar_t *dst, const char* str, MSVCRT_size_t n)
560 if(!MultiByteToWideChar(CP_ACP, 0, str, n, dst, 1))
562 /* return the number of bytes from src that have been used */
565 if(n >= 2 && MSVCRT_isleadbyte(*str) && str[1])
570 /*********************************************************************
571 * _mbbtombc(MSVCRT.@)
573 unsigned int _mbbtombc(unsigned int c)
575 if(MSVCRT___mb_cur_max > 1 &&
576 ((c >= 0x20 && c <=0x7e) ||(c >= 0xa1 && c <= 0xdf)))
578 /* FIXME: I can't get this function to return anything
579 * different to what I pass it...
582 return c; /* ASCII CP or no MB char */
585 /*********************************************************************
586 * _ismbbkana(MSVCRT.@)
588 int _ismbbkana(unsigned int c)
590 /* FIXME: use lc_ctype when supported, not lc_all */
591 if(MSVCRT_current_lc_all_cp == 932)
593 /* Japanese/Katakana, CP 932 */
594 return (c >= 0xa1 && c <= 0xdf);
599 /*********************************************************************
600 * _ismbcdigit(MSVCRT.@)
602 int _ismbcdigit(unsigned int ch)
604 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
605 return (get_char_typeW( wch ) & C1_DIGIT);
608 /*********************************************************************
609 * _ismbcgraph(MSVCRT.@)
611 int _ismbcgraph(unsigned int ch)
613 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
614 return (get_char_typeW( wch ) & (C1_UPPER | C1_LOWER | C1_DIGIT | C1_PUNCT | C1_ALPHA));
617 /*********************************************************************
618 * _ismbcalpha (MSVCRT.@)
620 int _ismbcalpha(unsigned int ch)
622 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
623 return (get_char_typeW( wch ) & C1_ALPHA);
626 /*********************************************************************
627 * _ismbclower (MSVCRT.@)
629 int _ismbclower(unsigned int ch)
631 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
632 return (get_char_typeW( wch ) & C1_UPPER);
635 /*********************************************************************
636 * _ismbcupper (MSVCRT.@)
638 int _ismbcupper(unsigned int ch)
640 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
641 return (get_char_typeW( wch ) & C1_LOWER);
644 /*********************************************************************
645 * _ismbcsymbol(MSVCRT.@)
647 int _ismbcsymbol(unsigned int ch)
649 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
651 if (!GetStringTypeW(CT_CTYPE3, &wch, 1, &ctype))
653 WARN("GetStringTypeW failed on %x\n", ch);
656 return ((ctype & C3_SYMBOL) != 0);
659 /*********************************************************************
660 * _ismbcalnum (MSVCRT.@)
662 int _ismbcalnum(unsigned int ch)
664 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
665 return (get_char_typeW( wch ) & (C1_ALPHA | C1_DIGIT));
668 /*********************************************************************
669 * _ismbcspace (MSVCRT.@)
671 int _ismbcspace(unsigned int ch)
673 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
674 return (get_char_typeW( wch ) & C1_SPACE);
677 /*********************************************************************
678 * _ismbcprint (MSVCRT.@)
680 int _ismbcprint(unsigned int ch)
682 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
683 return (get_char_typeW( wch ) & (C1_UPPER | C1_LOWER | C1_DIGIT | C1_PUNCT | C1_ALPHA | C1_SPACE));
686 /*********************************************************************
687 * _ismbcpunct(MSVCRT.@)
689 int _ismbcpunct(unsigned int ch)
691 MSVCRT_wchar_t wch = msvcrt_mbc_to_wc( ch );
692 return (get_char_typeW( wch ) & C1_PUNCT);
695 /*********************************************************************
696 * _ismbchira(MSVCRT.@)
698 int _ismbchira(unsigned int c)
700 /* FIXME: use lc_ctype when supported, not lc_all */
701 if(MSVCRT_current_lc_all_cp == 932)
703 /* Japanese/Hiragana, CP 932 */
704 return (c >= 0x829f && c <= 0x82f1);
709 /*********************************************************************
710 * _ismbckata(MSVCRT.@)
712 int _ismbckata(unsigned int c)
714 /* FIXME: use lc_ctype when supported, not lc_all */
715 if(MSVCRT_current_lc_all_cp == 932)
718 return _ismbbkana(c);
719 /* Japanese/Katakana, CP 932 */
720 return (c >= 0x8340 && c <= 0x8396 && c != 0x837f);
725 /*********************************************************************
726 * _ismbblead(MSVCRT.@)
728 int _ismbblead(unsigned int c)
730 /* FIXME: should reference MSVCRT_mbctype */
731 return MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(c);
735 /*********************************************************************
736 * _ismbbtrail(MSVCRT.@)
738 int _ismbbtrail(unsigned int c)
740 /* FIXME: should reference MSVCRT_mbctype */
741 return !_ismbblead(c);
744 /*********************************************************************
745 * _ismbslead(MSVCRT.@)
747 int _ismbslead(const unsigned char* start, const unsigned char* str)
749 /* Lead bytes can also be trail bytes if caller messed up
750 * iterating through the string...
752 if(MSVCRT___mb_cur_max > 1)
755 start += MSVCRT_isleadbyte(*str) ? 2 : 1;
758 return MSVCRT_isleadbyte(*str);
760 return 0; /* Must have been a trail, we skipped it */
763 /*********************************************************************
764 * _ismbstrail(MSVCRT.@)
766 int _ismbstrail(const unsigned char* start, const unsigned char* str)
768 /* Must not be a lead, and must be preceeded by one */
769 return !_ismbslead(start, str) && MSVCRT_isleadbyte(str[-1]);
772 /*********************************************************************
775 unsigned char* _mbsset(unsigned char* str, unsigned int c)
777 unsigned char* ret = str;
779 if(MSVCRT___mb_cur_max == 1 || c < 256)
780 return _strset(str, c); /* ASCII CP or SB char */
782 c &= 0xffff; /* Strip high bits */
784 while(str[0] && str[1])
790 str[0] = '\0'; /* FIXME: OK to shorten? */
795 /*********************************************************************
798 unsigned char* _mbsnset(unsigned char* str, unsigned int c, MSVCRT_size_t len)
800 unsigned char *ret = str;
805 if(MSVCRT___mb_cur_max == 1 || c < 256)
806 return _strnset(str, c, len); /* ASCII CP or SB char */
808 c &= 0xffff; /* Strip high bits */
810 while(str[0] && str[1] && len--)
816 str[0] = '\0'; /* FIXME: OK to shorten? */
821 /*********************************************************************
822 * _mbsnccnt(MSVCRT.@)
823 * 'c' is for 'character'.
825 MSVCRT_size_t _mbsnccnt(const unsigned char* str, MSVCRT_size_t len)
828 if(MSVCRT___mb_cur_max > 1)
831 while(*str && len-- > 0)
833 if(MSVCRT_isleadbyte(*str))
846 return min(ret, len); /* ASCII CP */
849 /*********************************************************************
850 * _mbsnbcnt(MSVCRT.@)
851 * 'b' is for byte count.
853 MSVCRT_size_t _mbsnbcnt(const unsigned char* str, MSVCRT_size_t len)
856 if(MSVCRT___mb_cur_max > 1)
858 const unsigned char* xstr = str;
859 while(*xstr && len-- > 0)
861 if (MSVCRT_isleadbyte(*xstr++))
867 return min(ret, len); /* ASCII CP */
871 /*********************************************************************
874 unsigned char* _mbsncat(unsigned char* dst, const unsigned char* src, MSVCRT_size_t len)
876 if(MSVCRT___mb_cur_max > 1)
881 if (MSVCRT_isleadbyte(*dst++))
884 while (*src && len--)
887 if(MSVCRT_isleadbyte(*src++))
893 return strncat(dst, src, len); /* ASCII CP */
897 /*********************************************************************
900 unsigned char* _mbslwr(unsigned char* s)
904 if (MSVCRT___mb_cur_max > 1)
910 c = _mbctolower(_mbsnextc(s));
911 /* Note that I assume that the size of the character is unchanged */
925 /*********************************************************************
928 unsigned char* _mbsupr(unsigned char* s)
932 if (MSVCRT___mb_cur_max > 1)
938 c = _mbctoupper(_mbsnextc(s));
939 /* Note that I assume that the size of the character is unchanged */
953 /*********************************************************************
956 MSVCRT_size_t _mbsspn(const unsigned char* string, const unsigned char* set)
958 const unsigned char *p, *q;
960 for (p = string; *p; p++)
962 if (MSVCRT_isleadbyte(*p))
964 for (q = set; *q; q++)
968 if ((*p == *q) && (p[1] == q[1]))
976 for (q = set; *q; q++)
983 /*********************************************************************
986 MSVCRT_size_t _mbscspn(const unsigned char* str, const unsigned char* cmp)
988 if (MSVCRT___mb_cur_max > 1)
989 FIXME("don't handle double character case\n");
990 return strcspn(str, cmp);
993 /*********************************************************************
996 unsigned char* _mbsrev(unsigned char* str)
998 int i, len = _mbslen(str);
999 unsigned char *p, *temp=MSVCRT_malloc(len*2);
1004 /* unpack multibyte string to temp buffer */
1006 for(i=0; i<len; i++)
1008 if (MSVCRT_isleadbyte(*p))
1020 /* repack it in the reverse order */
1022 for(i=len-1; i>=0; i--)
1024 if(MSVCRT_isleadbyte(temp[i*2]))
1040 /*********************************************************************
1041 * _mbspbrk (MSVCRT.@)
1043 unsigned char* _mbspbrk(const unsigned char* str, const unsigned char* accept)
1045 const unsigned char* p;
1049 for(p = accept; *p; p += (MSVCRT_isleadbyte(*p)?2:1) )
1052 if( !MSVCRT_isleadbyte(*p) || ( *(p+1) == *(str+1) ) )
1053 return (unsigned char*)str;
1055 str += (MSVCRT_isleadbyte(*str)?2:1);