From c8430da710491d409d4db78797e7f2ea6a4d557c Mon Sep 17 00:00:00 2001 From: HEYAHONG <2229388563@qq.com> Date: Wed, 21 Aug 2024 17:25:39 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0hunicode=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- hbox/hbox.h | 1 + hbox/hunicode.c | 212 ++++++++++++++++++++++++++++++++++++++++++++++++ hbox/hunicode.h | 119 +++++++++++++++++++++++++++ 3 files changed, 332 insertions(+) create mode 100644 hbox/hunicode.c create mode 100644 hbox/hunicode.h diff --git a/hbox/hbox.h b/hbox/hbox.h index abd7873..e8e6a30 100644 --- a/hbox/hbox.h +++ b/hbox/hbox.h @@ -21,6 +21,7 @@ #include "hmemoryheap.h" #include "hobject.h" #include "hringbuf.h" +#include "hunicode.h" /* 由C++编写的组件 diff --git a/hbox/hunicode.c b/hbox/hunicode.c new file mode 100644 index 0000000..85acaab --- /dev/null +++ b/hbox/hunicode.c @@ -0,0 +1,212 @@ +/*************************************************************** + * Name: hunicode.c + * Purpose: 实现hunicode接口 + * Author: HYH (hyhsystem.cn) + * Created: 2024-08-21 + * Copyright: HYH (hyhsystem.cn) + * License: MIT + **************************************************************/ +#include "hunicode.h" + +/** \brief 获取在字符第几位为0(从高到低) + * + * \param c char 字符 + * \return size_t 第几位为0 + * + */ +static size_t char_zero_bit(char c) +{ + uint8_t u_c=(uint8_t)c; + for(size_t i=0; i<8; i++) + { + if((u_c&(0x80>>i))==0) + { + return i; + } + } + //不会走到此处 + return 0; +} +//获取一个字符在UTF-8在编码长度中的长度,返回0表示出错 +static size_t utf8_char_length(const char *utf8str) +{ + size_t len=0; + if(utf8str!=NULL) + { + uint8_t zero_bit=char_zero_bit(utf8str[0]); + if(zero_bit==0) + { + //单个字符 + return 1; + } + if(zero_bit>1) + { + return zero_bit; + } + } + return len; +} + +bool hunicode_cchar_string_is_utf8(const char *str) +{ + bool ret=(str!=NULL); + if(str!=NULL) + { + size_t index=0; + while(str[index]!='\0') + { + uint8_t utf8_char_len=utf8_char_length(&str[index]); + if(utf8_char_len==0) + { + ret=false; + break; + } + if(utf8_char_len==1) + { + index+=utf8_char_len; + continue; + } + if(utf8_char_len>1) + { + const char *utf8_char=&str[index]; + for(size_t i=1; i= 0x80) + { + ret=false; + break; + } + index++; + } + } + return ret; +} + +size_t hunicode_cchar_string_length(const char *str) +{ + size_t length=0; + if(str!=NULL) + { + while(str[length]!='\0') + { + length++; + } + } + return length; +} + +size_t hunicode_wchar_t_string_length(const wchar_t *str) +{ + size_t length=0; + if(str!=NULL) + { + while(str[length]!='\0') + { + length++; + } + } + return length; +} + +size_t hunicode_char_string_length(const hunicode_char_t *str) +{ + size_t length=0; + if(str!=NULL) + { + while(str[length]!=0) + { + length++; + } + } + return length; +} + +void hunicode_char_from_wchar(hunicode_char_t *dest,size_t dest_length,const wchar_t *src,size_t src_length) +{ + if(dest==NULL || dest_length==0||src==0||src_length==0) + { + return; + } + for(size_t i=0; i USHRT_MAX +#ifndef HUNICODE_USE_WCHAR_T +#define HUNICODE_USE_WCHAR_T 1 +#endif // HUNICODE_USE_WCHAR_T +#endif + +#ifdef HUNICODE_USE_WCHAR_T +#define hunicode_char_t wchar_t +#else +#define hunicode_char_t int +#endif + +/** \brief 判断是否为ascii字符串 + * + * \param str const char* 字符串指针 + * \return bool 是否是ascii字符串 + * + */ +bool hunicode_cchar_string_is_ascii(const char *str); + +/** \brief 判断是否为utf8字符串 + * + * \param str const char* 字符串指针 + * \return bool 是否是utf8字符串 + * + */ +bool hunicode_cchar_string_is_utf8(const char *str); + +/** \brief 获取char字符串长度 + * + * \param str const char* 字符串指针 + * \return size_t 字符串长度 + * + */ +size_t hunicode_cchar_string_length(const char *str); + +/** \brief 获取wchar_t字符串长度 + * + * \param str const wchar_t* 字符串指针 + * \return size_t 字符串长度 + * + */ +size_t hunicode_wchar_t_string_length(const wchar_t *str); + +/** \brief 获取hunicode_char_t字符串长度 + * + * \param str const hunicode_char_t* 字符串指针 + * \return size_t 字符串长度 + * + */ +size_t hunicode_char_string_length(const hunicode_char_t *str); + +/** \brief 从wchar_t转换到hunicode_char_t + * + * \param dest hunicode_char_t* hunicode_char_t指针 + * \param dest_length size_t hunicode_char_t指针所指的缓冲区长度 + * \param src const wchar_t* wchar_t指针 + * \param src_length size_t wchar_t指针所指的缓冲区长度 + * + */ +void hunicode_char_from_wchar(hunicode_char_t *dest,size_t dest_length,const wchar_t *src,size_t src_length); + +/** \brief 从wchar_t字符串转换到hunicode_char_t + * + * \param dest hunicode_char_t* hunicode_char_t指针 + * \param dest_length size_t hunicode_char_t指针所指的缓冲区长度 + * \param src const wchar_t* wchar_t字符串指针 + * + */ +void hunicode_char_from_wchar_string(hunicode_char_t *dest,size_t dest_length,const wchar_t *src); + +/** \brief 从hunicode_char_t转换到wchar_t + * + * \param dest wchar_t* wchar_t指针 + * \param dest_length size_t wchar_t指针所指的缓冲区长度 + * \param src const hunicode_char_t* hunicode_char_t指针 + * \param src_length size_t hunicode_char_t指针所指的缓冲区长度 + * + */ +void hunicode_char_to_wchar(wchar_t *dest,size_t dest_length,const hunicode_char_t *src,size_t src_length); + +/** \brief 从hunicode_char_t字符串转换到wchar_t + * + * \param dest wchar_t* wchar_t指针 + * \param dest_length size_t wchar_t指针所指的缓冲区长度 + * \param src const hunicode_char_t* hunicode_char_t字符串指针 + * + */ +void hunicode_char_string_to_wchar(wchar_t *dest,size_t dest_length,const hunicode_char_t *src); + +#ifdef __cplusplus +} +#endif // __cplusplus + + +#endif // __HUNICODE_H_INCLUDED__