2 * msvcrt.dll mbcs functions
4 * Copyright 1999 Alexandre Julliard
5 * Copyright 2000 Jon Griffths
8 * Not currently binary compatible with win32. MSVCRT_mbctype must be
9 * populated correctly and the ismb* functions should reference it.
14 #include "msvcrt/mbctype.h"
15 #include "msvcrt/mbstring.h"
16 #include "msvcrt/stdlib.h"
17 #include "msvcrt/string.h"
18 #include "msvcrt/wctype.h"
21 #include "wine/debug.h"
23 WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);
25 unsigned char MSVCRT_mbctype[257];
26 int MSVCRT___mb_cur_max = 1;
28 /*********************************************************************
29 * __p__mbctype (MSVCRT.@)
31 unsigned char* __p__mbctype(void)
33 return MSVCRT_mbctype;
36 /*********************************************************************
37 * __p___mb_cur_max(MSVCRT.@)
39 int* __p___mb_cur_max(void)
41 return &MSVCRT___mb_cur_max;
44 /*********************************************************************
47 unsigned int _mbsnextc(const unsigned char* str)
49 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*str))
50 return *str << 8 | str[1];
51 return *str; /* ASCII CP or SB char */
54 /*********************************************************************
55 * _mbctolower(MSVCRT.@)
57 unsigned int _mbctolower(unsigned int c)
59 if (MSVCRT_isleadbyte(c))
61 FIXME("Handle MBC chars\n");
64 return tolower(c); /* ASCII CP or SB char */
67 /*********************************************************************
68 * _mbctoupper(MSVCRT.@)
70 unsigned int _mbctoupper(unsigned int c)
72 if (MSVCRT_isleadbyte(c))
74 FIXME("Handle MBC chars\n");
77 return toupper(c); /* ASCII CP or SB char */
80 /*********************************************************************
83 unsigned char* _mbsdec(const unsigned char* start, const unsigned char* cur)
85 if(MSVCRT___mb_cur_max > 1)
86 return (char *)(_ismbstrail(start,cur-1) ? cur - 2 : cur -1);
88 return (char *)cur - 1; /* ASCII CP or SB char */
91 /*********************************************************************
94 unsigned char* _mbsinc(const unsigned char* str)
96 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*str))
97 return (unsigned char*)str + 2; /* MB char */
99 return (unsigned char*)str + 1; /* ASCII CP or SB char */
102 /*********************************************************************
105 unsigned char* _mbsninc(const unsigned char* str, MSVCRT_size_t num)
109 if(MSVCRT___mb_cur_max > 1)
113 return (unsigned char*)str;
115 return (unsigned char*)str + num; /* ASCII CP */
118 /*********************************************************************
121 unsigned int _mbclen(const unsigned char* str)
123 return MSVCRT_isleadbyte(*str) ? 2 : 1;
126 /*********************************************************************
129 int MSVCRT_mblen(const char* str, MSVCRT_size_t size)
131 if (str && *str && size)
133 if(MSVCRT___mb_cur_max == 1)
134 return 1; /* ASCII CP */
136 return !MSVCRT_isleadbyte(*str) ? 1 : (size>1 ? 2 : -1);
141 /*********************************************************************
144 MSVCRT_size_t _mbslen(const unsigned char* str)
146 if(MSVCRT___mb_cur_max > 1)
148 MSVCRT_size_t len = 0;
151 str += MSVCRT_isleadbyte(*str) ? 2 : 1;
156 return strlen(str); /* ASCII CP */
159 /*********************************************************************
160 * _mbstrlen(MSVCRT.@)
162 MSVCRT_size_t _mbstrlen(const char* str)
164 if(MSVCRT___mb_cur_max > 1)
166 MSVCRT_size_t len = 0;
169 /* FIXME: According to the documentation we are supposed to test for
170 * multi-byte character validity. Whatever that means
172 str += MSVCRT_isleadbyte(*str) ? 2 : 1;
177 return strlen(str); /* ASCII CP */
180 /*********************************************************************
183 void _mbccpy(unsigned char* dest, const unsigned char* src)
186 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*src))
187 *dest = *++src; /* MB char */
189 ERR("failure.. is this ok?\n");
192 /*********************************************************************
195 unsigned char* _mbsncpy(unsigned char* dst, const unsigned char* src, MSVCRT_size_t n)
199 if(MSVCRT___mb_cur_max > 1)
201 unsigned char* ret = dst;
205 if (MSVCRT_isleadbyte(*src++))
212 return strncpy(dst, src, n); /* ASCII CP */
215 /*********************************************************************
216 * _mbsnbcpy(MSVCRT.@)
218 unsigned char* _mbsnbcpy(unsigned char* dst, const unsigned char* src, MSVCRT_size_t n)
222 if(MSVCRT___mb_cur_max > 1)
224 unsigned char* ret = dst;
225 while (*src && (n-- > 1))
228 if (MSVCRT_isleadbyte(*src++))
234 if (*src && n && !MSVCRT_isleadbyte(*src))
236 /* If the last character is a multi-byte character then
237 * we cannot copy it since we have only one byte left
246 return strncpy(dst, src, n); /* ASCII CP */
249 /*********************************************************************
252 int _mbscmp(const unsigned char* str, const unsigned char* cmp)
254 if(MSVCRT___mb_cur_max > 1)
256 unsigned int strc, cmpc;
259 return *cmp ? -1 : 0;
262 strc = _mbsnextc(str);
263 cmpc = _mbsnextc(cmp);
265 return strc < cmpc ? -1 : 1;
266 str +=(strc > 255) ? 2 : 1;
267 cmp +=(strc > 255) ? 2 : 1; /* equal, use same increment */
270 return strcmp(str, cmp); /* ASCII CP */
273 /*********************************************************************
276 int _mbsicmp(const unsigned char* str, const unsigned char* cmp)
278 if(MSVCRT___mb_cur_max > 1)
280 unsigned int strc, cmpc;
283 return *cmp ? -1 : 0;
286 strc = _mbctolower(_mbsnextc(str));
287 cmpc = _mbctolower(_mbsnextc(cmp));
289 return strc < cmpc ? -1 : 1;
290 str +=(strc > 255) ? 2 : 1;
291 cmp +=(strc > 255) ? 2 : 1; /* equal, use same increment */
294 return strcasecmp(str, cmp); /* ASCII CP */
297 /*********************************************************************
300 int _mbsncmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
305 if(MSVCRT___mb_cur_max > 1)
307 unsigned int strc, cmpc;
312 return *cmp ? -1 : 0;
315 strc = _mbsnextc(str);
316 cmpc = _mbsnextc(cmp);
318 return strc < cmpc ? -1 : 1;
319 inc=(strc > 255) ? 2 : 1; /* Equal, use same increment */
323 return 0; /* Matched len chars */
325 return strncmp(str, cmp, len); /* ASCII CP */
328 /*********************************************************************
329 * _mbsnbcmp(MSVCRT.@)
331 int _mbsnbcmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
335 if(MSVCRT___mb_cur_max > 1)
337 unsigned int strc, cmpc;
342 return *cmp ? -1 : 0;
345 if (MSVCRT_isleadbyte(*str))
347 strc=(len>=2)?_mbsnextc(str):0;
355 if (MSVCRT_isleadbyte(*cmp))
356 cmpc=(len>=2)?_mbsnextc(cmp):0;
360 return strc < cmpc ? -1 : 1;
365 return 0; /* Matched len chars */
366 FIXME("%s %s %d\n",str,cmp,len);
368 return strncmp(str,cmp,len);
371 /*********************************************************************
372 * _mbsnicmp(MSVCRT.@)
374 * Compare two multibyte strings case insensitively to 'len' characters.
376 int _mbsnicmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
378 /* FIXME: No tolower() for mb strings yet */
379 if(MSVCRT___mb_cur_max > 1)
381 unsigned int strc, cmpc;
385 return *cmp ? -1 : 0;
388 strc = _mbctolower(_mbsnextc(str));
389 cmpc = _mbctolower(_mbsnextc(cmp));
391 return strc < cmpc ? -1 : 1;
392 str +=(strc > 255) ? 2 : 1;
393 cmp +=(strc > 255) ? 2 : 1; /* Equal, use same increment */
395 return 0; /* Matched len chars */
397 return strncasecmp(str, cmp, len); /* ASCII CP */
400 /*********************************************************************
403 * Find a multibyte character in a multibyte string.
405 unsigned char* _mbschr(const unsigned char* s, unsigned int x)
407 if(MSVCRT___mb_cur_max > 1)
414 return (unsigned char*)s;
417 s += c > 255 ? 2 : 1;
420 return strchr(s, x); /* ASCII CP */
423 /*********************************************************************
426 unsigned char* _mbsrchr(const unsigned char* s, unsigned int x)
428 if(MSVCRT___mb_cur_max > 1)
431 unsigned char* match=NULL;
437 match=(unsigned char*)s;
440 s +=(c > 255) ? 2 : 1;
446 /*********************************************************************
449 int MSVCRT_mbtowc(WCHAR *dst, const char* str, MSVCRT_size_t n)
453 if(!MultiByteToWideChar(CP_ACP, 0, str, n, dst, 1))
455 /* return the number of bytes from src that have been used */
458 if(n >= 2 && MSVCRT_isleadbyte(*str) && str[1])
463 /*********************************************************************
464 * _mbbtombc(MSVCRT.@)
466 unsigned int _mbbtombc(unsigned int c)
468 if(MSVCRT___mb_cur_max > 1 &&
469 ((c >= 0x20 && c <=0x7e) ||(c >= 0xa1 && c <= 0xdf)))
471 /* FIXME: I can't get this function to return anything
472 * different to what I pass it...
475 return c; /* ASCII CP or no MB char */
478 /*********************************************************************
479 * _ismbbkana(MSVCRT.@)
481 int _ismbbkana(unsigned int c)
483 /* FIXME: use lc_ctype when supported, not lc_all */
484 if(MSVCRT_current_lc_all_cp == 932)
486 /* Japanese/Katakana, CP 932 */
487 return (c >= 0xa1 && c <= 0xdf);
492 /*********************************************************************
493 * _ismbcdigit(MSVCRT.@)
495 int _ismbcdigit(unsigned int ch)
501 FIXME("Handle MBC chars\n");
506 /*********************************************************************
507 * _ismbcspace (MSVCRT.@)
509 int _ismbcspace(unsigned int c)
518 /*********************************************************************
519 * _ismbchira(MSVCRT.@)
521 int _ismbchira(unsigned int c)
523 /* FIXME: use lc_ctype when supported, not lc_all */
524 if(MSVCRT_current_lc_all_cp == 932)
526 /* Japanese/Hiragana, CP 932 */
527 return (c >= 0x829f && c <= 0x82f1);
532 /*********************************************************************
533 * _ismbckata(MSVCRT.@)
535 int _ismbckata(unsigned int c)
537 /* FIXME: use lc_ctype when supported, not lc_all */
538 if(MSVCRT_current_lc_all_cp == 932)
541 return _ismbbkana(c);
542 /* Japanese/Katakana, CP 932 */
543 return (c >= 0x8340 && c <= 0x8396 && c != 0x837f);
548 /*********************************************************************
549 * _ismbblead(MSVCRT.@)
551 int _ismbblead(unsigned int c)
553 /* FIXME: should reference MSVCRT_mbctype */
554 return MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(c);
558 /*********************************************************************
559 * _ismbbtrail(MSVCRT.@)
561 int _ismbbtrail(unsigned int c)
563 /* FIXME: should reference MSVCRT_mbctype */
564 return !_ismbblead(c);
567 /*********************************************************************
568 * _ismbslead(MSVCRT.@)
570 int _ismbslead(const unsigned char* start, const unsigned char* str)
572 /* Lead bytes can also be trail bytes if caller messed up
573 * iterating through the string...
575 if(MSVCRT___mb_cur_max > 1)
578 start += MSVCRT_isleadbyte(*str) ? 2 : 1;
581 return MSVCRT_isleadbyte(*str);
583 return 0; /* Must have been a trail, we skipped it */
586 /*********************************************************************
587 * _ismbstrail(MSVCRT.@)
589 int _ismbstrail(const unsigned char* start, const unsigned char* str)
591 /* Must not be a lead, and must be preceeded by one */
592 return !_ismbslead(start, str) && MSVCRT_isleadbyte(str[-1]);
595 /*********************************************************************
598 unsigned char* _mbsset(unsigned char* str, unsigned int c)
600 unsigned char* ret = str;
602 if(MSVCRT___mb_cur_max == 1 || c < 256)
603 return _strset(str, c); /* ASCII CP or SB char */
605 c &= 0xffff; /* Strip high bits */
607 while(str[0] && str[1])
613 str[0] = '\0'; /* FIXME: OK to shorten? */
618 /*********************************************************************
621 unsigned char* _mbsnset(unsigned char* str, unsigned int c, MSVCRT_size_t len)
623 unsigned char *ret = str;
628 if(MSVCRT___mb_cur_max == 1 || c < 256)
629 return _strnset(str, c, len); /* ASCII CP or SB char */
631 c &= 0xffff; /* Strip high bits */
633 while(str[0] && str[1] && len--)
639 str[0] = '\0'; /* FIXME: OK to shorten? */
644 /*********************************************************************
645 * _mbsnccnt(MSVCRT.@)
646 * 'c' is for 'character'.
648 MSVCRT_size_t _mbsnccnt(const unsigned char* str, MSVCRT_size_t len)
651 if(MSVCRT___mb_cur_max > 1)
654 while(*str && len-- > 0)
656 if(MSVCRT_isleadbyte(*str))
669 return min(ret, len); /* ASCII CP */
672 /*********************************************************************
673 * _mbsnbcnt(MSVCRT.@)
674 * 'b' is for byte count.
676 MSVCRT_size_t _mbsnbcnt(const unsigned char* str, MSVCRT_size_t len)
679 if(MSVCRT___mb_cur_max > 1)
681 const unsigned char* xstr = str;
682 while(*xstr && len-- > 0)
684 if (MSVCRT_isleadbyte(*xstr++))
690 return min(ret, len); /* ASCII CP */
694 /*********************************************************************
697 unsigned char* _mbsncat(unsigned char* dst, const unsigned char* src, MSVCRT_size_t len)
699 if(MSVCRT___mb_cur_max > 1)
704 if (MSVCRT_isleadbyte(*dst++))
707 while (*src && len--)
710 if(MSVCRT_isleadbyte(*src++))
716 return strncat(dst, src, len); /* ASCII CP */
720 /*********************************************************************
723 unsigned char* _mbslwr(unsigned char* s)
727 if (MSVCRT___mb_cur_max > 1)
733 c = _mbctolower(_mbsnextc(s));
734 /* Note that I assume that the size of the character is unchanged */
748 /*********************************************************************
751 unsigned char* _mbsupr(unsigned char* s)
755 if (MSVCRT___mb_cur_max > 1)
761 c = _mbctoupper(_mbsnextc(s));
762 /* Note that I assume that the size of the character is unchanged */
776 /*********************************************************************
779 MSVCRT_size_t _mbsspn(const unsigned char* string, const unsigned char* set)
781 const unsigned char *p, *q;
783 for (p = string; *p; p++)
785 if (MSVCRT_isleadbyte(*p))
787 for (q = set; *q; q++)
791 if ((*p == *q) && (p[1] == q[1]))
799 for (q = set; *q; q++)
806 /*********************************************************************
809 MSVCRT_size_t _mbscspn(const unsigned char* str, const unsigned char* cmp)
811 if (MSVCRT___mb_cur_max > 1)
812 FIXME("don't handle double character case\n");
813 return strcspn(str, cmp);
816 /*********************************************************************
819 unsigned char* _mbsrev(unsigned char* str)
821 int i, len = _mbslen(str);
822 unsigned char *p, *temp=MSVCRT_malloc(len*2);
827 /* unpack multibyte string to temp buffer */
831 if (MSVCRT_isleadbyte(*p))
843 /* repack it in the reverse order */
845 for(i=len-1; i>=0; i--)
847 if(MSVCRT_isleadbyte(temp[i*2]))
863 /*********************************************************************
864 * _mbspbrk (MSVCRT.@)
866 unsigned char* _mbspbrk(const unsigned char* str, const unsigned char* accept)
868 const unsigned char* p;
872 for(p = accept; *p; p += (MSVCRT_isleadbyte(*p)?2:1) )
875 if( !MSVCRT_isleadbyte(*p) || ( *(p+1) == *(str+1) ) )
876 return (unsigned char*)str;
878 str += (MSVCRT_isleadbyte(*str)?2:1);