2 * msvcrt.dll mbcs functions
4 * Copyright 1999 Alexandre Julliard
5 * Copyright 2000 Jon Griffths
8 * Not currently binary compatible with win32. MSVCRT_mbctype must be
9 * populated correctly and the ismb* functions should reference it.
14 #include "msvcrt/mbctype.h"
15 #include "msvcrt/mbstring.h"
16 #include "msvcrt/stdlib.h"
17 #include "msvcrt/string.h"
18 #include "msvcrt/wctype.h"
21 DEFAULT_DEBUG_CHANNEL(msvcrt);
23 unsigned char MSVCRT_mbctype[257];
24 int MSVCRT___mb_cur_max = 1;
26 /*********************************************************************
27 * __p__mbctype (MSVCRT.@)
29 unsigned char* __p__mbctype(void)
31 return MSVCRT_mbctype;
34 /*********************************************************************
35 * __p___mb_cur_max(MSVCRT.@)
37 int* __p___mb_cur_max(void)
39 return &MSVCRT___mb_cur_max;
42 /*********************************************************************
45 unsigned int _mbsnextc(const unsigned char* str)
47 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*str))
48 return *str << 8 | str[1];
49 return *str; /* ASCII CP or SB char */
52 /*********************************************************************
53 * _mbctolower(MSVCRT.@)
55 unsigned int _mbctolower(unsigned int c)
57 if (MSVCRT_isleadbyte(c))
59 FIXME("Handle MBC chars\n");
62 return tolower(c); /* ASCII CP or SB char */
65 /*********************************************************************
66 * _mbctoupper(MSVCRT.@)
68 unsigned int _mbctoupper(unsigned int c)
70 if (MSVCRT_isleadbyte(c))
72 FIXME("Handle MBC chars\n");
75 return toupper(c); /* ASCII CP or SB char */
78 /*********************************************************************
81 unsigned char* _mbsdec(const unsigned char* start, const unsigned char* cur)
83 if(MSVCRT___mb_cur_max > 1)
84 return (char *)(_ismbstrail(start,cur-1) ? cur - 2 : cur -1);
86 return (char *)cur - 1; /* ASCII CP or SB char */
89 /*********************************************************************
92 unsigned char* _mbsinc(const unsigned char* str)
94 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*str))
95 return (unsigned char*)str + 2; /* MB char */
97 return (unsigned char*)str + 1; /* ASCII CP or SB char */
100 /*********************************************************************
103 unsigned char* _mbsninc(const unsigned char* str, MSVCRT_size_t num)
107 if(MSVCRT___mb_cur_max > 1)
111 return (unsigned char*)str;
113 return (unsigned char*)str + num; /* ASCII CP */
116 /*********************************************************************
119 unsigned int _mbclen(const unsigned char* str)
121 return MSVCRT_isleadbyte(*str) ? 2 : 1;
124 /*********************************************************************
127 int MSVCRT_mblen(const char* str, MSVCRT_size_t size)
129 if (str && *str && size)
131 if(MSVCRT___mb_cur_max == 1)
132 return 1; /* ASCII CP */
134 return !MSVCRT_isleadbyte(*str) ? 1 : (size>1 ? 2 : -1);
139 /*********************************************************************
142 MSVCRT_size_t _mbslen(const unsigned char* str)
144 if(MSVCRT___mb_cur_max > 1)
146 MSVCRT_size_t len = 0;
149 str += MSVCRT_isleadbyte(*str) ? 2 : 1;
154 return strlen(str); /* ASCII CP */
157 /*********************************************************************
158 * _mbstrlen(MSVCRT.@)
160 MSVCRT_size_t _mbstrlen(const char* str)
162 if(MSVCRT___mb_cur_max > 1)
164 MSVCRT_size_t len = 0;
167 /* FIXME: According to the documentation we are supposed to test for
168 * multi-byte character validity. Whatever that means
170 str += MSVCRT_isleadbyte(*str) ? 2 : 1;
175 return strlen(str); /* ASCII CP */
178 /*********************************************************************
181 void _mbccpy(unsigned char* dest, const unsigned char* src)
184 if(MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(*src))
185 *dest = *++src; /* MB char */
187 ERR("failure.. is this ok?\n");
190 /*********************************************************************
193 unsigned char* _mbsncpy(unsigned char* dst, const unsigned char* src, MSVCRT_size_t n)
197 if(MSVCRT___mb_cur_max > 1)
199 unsigned char* ret = dst;
203 if (MSVCRT_isleadbyte(*src++))
210 return strncpy(dst, src, n); /* ASCII CP */
213 /*********************************************************************
214 * _mbsnbcpy(MSVCRT.@)
216 unsigned char* _mbsnbcpy(unsigned char* dst, const unsigned char* src, MSVCRT_size_t n)
220 if(MSVCRT___mb_cur_max > 1)
222 unsigned char* ret = dst;
223 while (*src && (n-- > 1))
226 if (MSVCRT_isleadbyte(*src++))
232 if (*src && n && !MSVCRT_isleadbyte(*src))
234 /* If the last character is a multi-byte character then
235 * we cannot copy it since we have only one byte left
244 return strncpy(dst, src, n); /* ASCII CP */
247 /*********************************************************************
250 int _mbscmp(const unsigned char* str, const unsigned char* cmp)
252 if(MSVCRT___mb_cur_max > 1)
254 unsigned int strc, cmpc;
257 return *cmp ? -1 : 0;
260 strc = _mbsnextc(str);
261 cmpc = _mbsnextc(cmp);
263 return strc < cmpc ? -1 : 1;
264 str +=(strc > 255) ? 2 : 1;
265 cmp +=(strc > 255) ? 2 : 1; /* equal, use same increment */
268 return strcmp(str, cmp); /* ASCII CP */
271 /*********************************************************************
274 int _mbsicmp(const unsigned char* str, const unsigned char* cmp)
276 if(MSVCRT___mb_cur_max > 1)
278 unsigned int strc, cmpc;
281 return *cmp ? -1 : 0;
284 strc = _mbctolower(_mbsnextc(str));
285 cmpc = _mbctolower(_mbsnextc(cmp));
287 return strc < cmpc ? -1 : 1;
288 str +=(strc > 255) ? 2 : 1;
289 cmp +=(strc > 255) ? 2 : 1; /* equal, use same increment */
292 return strcasecmp(str, cmp); /* ASCII CP */
295 /*********************************************************************
298 int _mbsncmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
303 if(MSVCRT___mb_cur_max > 1)
305 unsigned int strc, cmpc;
310 return *cmp ? -1 : 0;
313 strc = _mbsnextc(str);
314 cmpc = _mbsnextc(cmp);
316 return strc < cmpc ? -1 : 1;
317 inc=(strc > 255) ? 2 : 1; /* Equal, use same increment */
321 return 0; /* Matched len chars */
323 return strncmp(str, cmp, len); /* ASCII CP */
326 /*********************************************************************
327 * _mbsnbcmp(MSVCRT.@)
329 int _mbsnbcmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
333 if(MSVCRT___mb_cur_max > 1)
335 unsigned int strc, cmpc;
340 return *cmp ? -1 : 0;
343 if (MSVCRT_isleadbyte(*str))
345 strc=(len>=2)?_mbsnextc(str):0;
353 if (MSVCRT_isleadbyte(*cmp))
354 cmpc=(len>=2)?_mbsnextc(cmp):0;
358 return strc < cmpc ? -1 : 1;
363 return 0; /* Matched len chars */
364 FIXME("%s %s %d\n",str,cmp,len);
366 return strncmp(str,cmp,len);
369 /*********************************************************************
370 * _mbsnicmp(MSVCRT.@)
372 * Compare two multibyte strings case insensitively to 'len' characters.
374 int _mbsnicmp(const unsigned char* str, const unsigned char* cmp, MSVCRT_size_t len)
376 /* FIXME: No tolower() for mb strings yet */
377 if(MSVCRT___mb_cur_max > 1)
379 unsigned int strc, cmpc;
383 return *cmp ? -1 : 0;
386 strc = _mbctolower(_mbsnextc(str));
387 cmpc = _mbctolower(_mbsnextc(cmp));
389 return strc < cmpc ? -1 : 1;
390 str +=(strc > 255) ? 2 : 1;
391 cmp +=(strc > 255) ? 2 : 1; /* Equal, use same increment */
393 return 0; /* Matched len chars */
395 return strncasecmp(str, cmp, len); /* ASCII CP */
398 /*********************************************************************
401 * Find a multibyte character in a multibyte string.
403 unsigned char* _mbschr(const unsigned char* s, unsigned int x)
405 if(MSVCRT___mb_cur_max > 1)
412 return (unsigned char*)s;
415 s += c > 255 ? 2 : 1;
418 return strchr(s, x); /* ASCII CP */
421 /*********************************************************************
424 unsigned char* _mbsrchr(const unsigned char* s, unsigned int x)
426 if(MSVCRT___mb_cur_max > 1)
429 unsigned char* match=NULL;
435 match=(unsigned char*)s;
438 s +=(c > 255) ? 2 : 1;
444 /*********************************************************************
447 int MSVCRT_mbtowc(WCHAR *dst, const char* str, MSVCRT_size_t n)
451 if(!MultiByteToWideChar(CP_ACP, 0, str, n, dst, 1))
453 /* return the number of bytes from src that have been used */
456 if(n >= 2 && MSVCRT_isleadbyte(*str) && str[1])
461 /*********************************************************************
462 * _mbbtombc(MSVCRT.@)
464 unsigned int _mbbtombc(unsigned int c)
466 if(MSVCRT___mb_cur_max > 1 &&
467 ((c >= 0x20 && c <=0x7e) ||(c >= 0xa1 && c <= 0xdf)))
469 /* FIXME: I can't get this function to return anything
470 * different to what I pass it...
473 return c; /* ASCII CP or no MB char */
476 /*********************************************************************
477 * _ismbbkana(MSVCRT.@)
479 int _ismbbkana(unsigned int c)
481 /* FIXME: use lc_ctype when supported, not lc_all */
482 if(MSVCRT_current_lc_all_cp == 932)
484 /* Japanese/Katakana, CP 932 */
485 return (c >= 0xa1 && c <= 0xdf);
490 /*********************************************************************
491 * _ismbcdigit(MSVCRT.@)
493 int _ismbcdigit(unsigned int ch)
499 FIXME("Handle MBC chars\n");
504 /*********************************************************************
505 * _ismbcspace (MSVCRT.@)
507 int _ismbcspace(unsigned int c)
516 /*********************************************************************
517 * _ismbchira(MSVCRT.@)
519 int _ismbchira(unsigned int c)
521 /* FIXME: use lc_ctype when supported, not lc_all */
522 if(MSVCRT_current_lc_all_cp == 932)
524 /* Japanese/Hiragana, CP 932 */
525 return (c >= 0x829f && c <= 0x82f1);
530 /*********************************************************************
531 * _ismbckata(MSVCRT.@)
533 int _ismbckata(unsigned int c)
535 /* FIXME: use lc_ctype when supported, not lc_all */
536 if(MSVCRT_current_lc_all_cp == 932)
539 return _ismbbkana(c);
540 /* Japanese/Katakana, CP 932 */
541 return (c >= 0x8340 && c <= 0x8396 && c != 0x837f);
546 /*********************************************************************
547 * _ismbblead(MSVCRT.@)
549 int _ismbblead(unsigned int c)
551 /* FIXME: should reference MSVCRT_mbctype */
552 return MSVCRT___mb_cur_max > 1 && MSVCRT_isleadbyte(c);
556 /*********************************************************************
557 * _ismbbtrail(MSVCRT.@)
559 int _ismbbtrail(unsigned int c)
561 /* FIXME: should reference MSVCRT_mbctype */
562 return !_ismbblead(c);
565 /*********************************************************************
566 * _ismbslead(MSVCRT.@)
568 int _ismbslead(const unsigned char* start, const unsigned char* str)
570 /* Lead bytes can also be trail bytes if caller messed up
571 * iterating through the string...
573 if(MSVCRT___mb_cur_max > 1)
576 start += MSVCRT_isleadbyte(*str) ? 2 : 1;
579 return MSVCRT_isleadbyte(*str);
581 return 0; /* Must have been a trail, we skipped it */
584 /*********************************************************************
585 * _ismbstrail(MSVCRT.@)
587 int _ismbstrail(const unsigned char* start, const unsigned char* str)
589 /* Must not be a lead, and must be preceeded by one */
590 return !_ismbslead(start, str) && MSVCRT_isleadbyte(str[-1]);
593 /*********************************************************************
596 unsigned char* _mbsset(unsigned char* str, unsigned int c)
598 unsigned char* ret = str;
600 if(MSVCRT___mb_cur_max == 1 || c < 256)
601 return _strset(str, c); /* ASCII CP or SB char */
603 c &= 0xffff; /* Strip high bits */
605 while(str[0] && str[1])
611 str[0] = '\0'; /* FIXME: OK to shorten? */
616 /*********************************************************************
619 unsigned char* _mbsnset(unsigned char* str, unsigned int c, MSVCRT_size_t len)
621 unsigned char *ret = str;
626 if(MSVCRT___mb_cur_max == 1 || c < 256)
627 return _strnset(str, c, len); /* ASCII CP or SB char */
629 c &= 0xffff; /* Strip high bits */
631 while(str[0] && str[1] && len--)
637 str[0] = '\0'; /* FIXME: OK to shorten? */
642 /*********************************************************************
643 * _mbsnccnt(MSVCRT.@)
644 * 'c' is for 'character'.
646 MSVCRT_size_t _mbsnccnt(const unsigned char* str, MSVCRT_size_t len)
649 if(MSVCRT___mb_cur_max > 1)
652 while(*str && len-- > 0)
654 if(MSVCRT_isleadbyte(*str))
667 return min(ret, len); /* ASCII CP */
670 /*********************************************************************
671 * _mbsnbcnt(MSVCRT.@)
672 * 'b' is for byte count.
674 MSVCRT_size_t _mbsnbcnt(const unsigned char* str, MSVCRT_size_t len)
677 if(MSVCRT___mb_cur_max > 1)
679 const unsigned char* xstr = str;
680 while(*xstr && len-- > 0)
682 if (MSVCRT_isleadbyte(*xstr++))
688 return min(ret, len); /* ASCII CP */
692 /*********************************************************************
695 unsigned char* _mbsncat(unsigned char* dst, const unsigned char* src, MSVCRT_size_t len)
697 if(MSVCRT___mb_cur_max > 1)
702 if (MSVCRT_isleadbyte(*dst++))
705 while (*src && len--)
708 if(MSVCRT_isleadbyte(*src++))
714 return strncat(dst, src, len); /* ASCII CP */
718 /*********************************************************************
721 unsigned char* _mbslwr(unsigned char* s)
725 if (MSVCRT___mb_cur_max > 1)
731 c = _mbctolower(_mbsnextc(s));
732 /* Note that I assume that the size of the character is unchanged */
746 /*********************************************************************
749 unsigned char* _mbsupr(unsigned char* s)
753 if (MSVCRT___mb_cur_max > 1)
759 c = _mbctoupper(_mbsnextc(s));
760 /* Note that I assume that the size of the character is unchanged */
774 /*********************************************************************
777 MSVCRT_size_t _mbsspn(const unsigned char* string, const unsigned char* set)
779 const unsigned char *p, *q;
781 for (p = string; *p; p++)
783 if (MSVCRT_isleadbyte(*p))
785 for (q = set; *q; q++)
789 if ((*p == *q) && (p[1] == q[1]))
797 for (q = set; *q; q++)
804 /*********************************************************************
807 MSVCRT_size_t _mbscspn(const unsigned char* str, const unsigned char* cmp)
809 if (MSVCRT___mb_cur_max > 1)
810 FIXME("don't handle double character case\n");
811 return strcspn(str, cmp);
814 /*********************************************************************
817 unsigned char* _mbsrev(unsigned char* str)
819 int i, len = _mbslen(str);
820 unsigned char *p, *temp=MSVCRT_malloc(len*2);
825 /* unpack multibyte string to temp buffer */
829 if (MSVCRT_isleadbyte(*p))
841 /* repack it in the reverse order */
843 for(i=len-1; i>=0; i--)
845 if(MSVCRT_isleadbyte(temp[i*2]))
861 /*********************************************************************
862 * _mbspbrk (MSVCRT.@)
864 unsigned char* _mbspbrk(const unsigned char* str, const unsigned char* accept)
866 const unsigned char* p;
870 for(p = accept; *p; p += (MSVCRT_isleadbyte(*p)?2:1) )
873 if( !MSVCRT_isleadbyte(*p) || ( *(p+1) == *(str+1) ) )
874 return (unsigned char*)str;
876 str += (MSVCRT_isleadbyte(*str)?2:1);