-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
332 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
/*************************************************************** | ||
* Name: hunicode.c | ||
* Purpose: 实现hunicode接口 | ||
* Author: HYH (hyhsystem.cn) | ||
* Created: 2024-08-21 | ||
* Copyright: HYH (hyhsystem.cn) | ||
* License: MIT | ||
**************************************************************/ | ||
#include "hunicode.h" | ||
|
||
/** \brief 获取在字符第几位为0(从高到低) | ||
* | ||
* \param c char 字符 | ||
* \return size_t 第几位为0 | ||
* | ||
*/ | ||
static size_t char_zero_bit(char c) | ||
{ | ||
uint8_t u_c=(uint8_t)c; | ||
for(size_t i=0; i<8; i++) | ||
{ | ||
if((u_c&(0x80>>i))==0) | ||
{ | ||
return i; | ||
} | ||
} | ||
//不会走到此处 | ||
return 0; | ||
} | ||
//获取一个字符在UTF-8在编码长度中的长度,返回0表示出错 | ||
static size_t utf8_char_length(const char *utf8str) | ||
{ | ||
size_t len=0; | ||
if(utf8str!=NULL) | ||
{ | ||
uint8_t zero_bit=char_zero_bit(utf8str[0]); | ||
if(zero_bit==0) | ||
{ | ||
//单个字符 | ||
return 1; | ||
} | ||
if(zero_bit>1) | ||
{ | ||
return zero_bit; | ||
} | ||
} | ||
return len; | ||
} | ||
|
||
bool hunicode_cchar_string_is_utf8(const char *str) | ||
{ | ||
bool ret=(str!=NULL); | ||
if(str!=NULL) | ||
{ | ||
size_t index=0; | ||
while(str[index]!='\0') | ||
{ | ||
uint8_t utf8_char_len=utf8_char_length(&str[index]); | ||
if(utf8_char_len==0) | ||
{ | ||
ret=false; | ||
break; | ||
} | ||
if(utf8_char_len==1) | ||
{ | ||
index+=utf8_char_len; | ||
continue; | ||
} | ||
if(utf8_char_len>1) | ||
{ | ||
const char *utf8_char=&str[index]; | ||
for(size_t i=1; i<utf8_char_len; i++) | ||
{ | ||
if(char_zero_bit(utf8_char[i])!=1) | ||
{ | ||
ret=false; | ||
break; | ||
} | ||
} | ||
if(!ret) | ||
{ | ||
break; | ||
} | ||
index+=utf8_char_len; | ||
} | ||
} | ||
} | ||
return ret; | ||
} | ||
|
||
bool hunicode_cchar_string_is_ascii(const char *str) | ||
{ | ||
bool ret=(str!=NULL); | ||
if(str!=NULL) | ||
{ | ||
size_t index=0; | ||
while(str[index]!='\0') | ||
{ | ||
if(((uint8_t)str[index]) >= 0x80) | ||
{ | ||
ret=false; | ||
break; | ||
} | ||
index++; | ||
} | ||
} | ||
return ret; | ||
} | ||
|
||
size_t hunicode_cchar_string_length(const char *str) | ||
{ | ||
size_t length=0; | ||
if(str!=NULL) | ||
{ | ||
while(str[length]!='\0') | ||
{ | ||
length++; | ||
} | ||
} | ||
return length; | ||
} | ||
|
||
size_t hunicode_wchar_t_string_length(const wchar_t *str) | ||
{ | ||
size_t length=0; | ||
if(str!=NULL) | ||
{ | ||
while(str[length]!='\0') | ||
{ | ||
length++; | ||
} | ||
} | ||
return length; | ||
} | ||
|
||
size_t hunicode_char_string_length(const hunicode_char_t *str) | ||
{ | ||
size_t length=0; | ||
if(str!=NULL) | ||
{ | ||
while(str[length]!=0) | ||
{ | ||
length++; | ||
} | ||
} | ||
return length; | ||
} | ||
|
||
void hunicode_char_from_wchar(hunicode_char_t *dest,size_t dest_length,const wchar_t *src,size_t src_length) | ||
{ | ||
if(dest==NULL || dest_length==0||src==0||src_length==0) | ||
{ | ||
return; | ||
} | ||
for(size_t i=0; i<src_length; i++) | ||
{ | ||
if(i<dest_length) | ||
{ | ||
dest[i]=(hunicode_char_t)src[i]; | ||
} | ||
else | ||
{ | ||
break; | ||
} | ||
} | ||
} | ||
|
||
void hunicode_char_from_wchar_string(hunicode_char_t *dest,size_t dest_length,const wchar_t *src) | ||
{ | ||
size_t length=hunicode_wchar_t_string_length(src); | ||
hunicode_char_from_wchar(dest,dest_length,src,length); | ||
if(length < dest_length) | ||
{ | ||
if(dest!=NULL) | ||
{ | ||
dest[length]=0; | ||
} | ||
} | ||
} | ||
|
||
void hunicode_char_to_wchar(wchar_t *dest,size_t dest_length,const hunicode_char_t *src,size_t src_length) | ||
{ | ||
if(dest==NULL || dest_length==0||src==0||src_length==0) | ||
{ | ||
return; | ||
} | ||
for(size_t i=0; i<src_length; i++) | ||
{ | ||
if(i<dest_length) | ||
{ | ||
dest[i]=(wchar_t)src[i]; | ||
} | ||
else | ||
{ | ||
break; | ||
} | ||
} | ||
} | ||
|
||
void hunicode_char_string_to_wchar(wchar_t *dest,size_t dest_length,const hunicode_char_t *src) | ||
{ | ||
size_t length=hunicode_char_string_length(src); | ||
hunicode_char_to_wchar(dest,dest_length,src,length); | ||
if(length < dest_length) | ||
{ | ||
if(dest!=NULL) | ||
{ | ||
dest[length]=0; | ||
} | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
/*************************************************************** | ||
* Name: hunicode.h | ||
* Purpose: 声明hunicode接口 | ||
* Author: HYH (hyhsystem.cn) | ||
* Created: 2024-08-21 | ||
* Copyright: HYH (hyhsystem.cn) | ||
* License: MIT | ||
**************************************************************/ | ||
#ifndef __HUNICODE_H_INCLUDED__ | ||
#define __HUNICODE_H_INCLUDED__ | ||
#include "stdbool.h" | ||
#include "stdint.h" | ||
#include "stdlib.h" | ||
#include "limits.h" | ||
/* | ||
本文件用于处理Unicode的相关辅助函数。 | ||
*/ | ||
|
||
#ifdef __cplusplus | ||
extern "C" | ||
{ | ||
#endif // __cplusplus | ||
|
||
#if WCHAR_MAX > USHRT_MAX | ||
#ifndef HUNICODE_USE_WCHAR_T | ||
#define HUNICODE_USE_WCHAR_T 1 | ||
#endif // HUNICODE_USE_WCHAR_T | ||
#endif | ||
|
||
#ifdef HUNICODE_USE_WCHAR_T | ||
#define hunicode_char_t wchar_t | ||
#else | ||
#define hunicode_char_t int | ||
#endif | ||
|
||
/** \brief 判断是否为ascii字符串 | ||
* | ||
* \param str const char* 字符串指针 | ||
* \return bool 是否是ascii字符串 | ||
* | ||
*/ | ||
bool hunicode_cchar_string_is_ascii(const char *str); | ||
|
||
/** \brief 判断是否为utf8字符串 | ||
* | ||
* \param str const char* 字符串指针 | ||
* \return bool 是否是utf8字符串 | ||
* | ||
*/ | ||
bool hunicode_cchar_string_is_utf8(const char *str); | ||
|
||
/** \brief 获取char字符串长度 | ||
* | ||
* \param str const char* 字符串指针 | ||
* \return size_t 字符串长度 | ||
* | ||
*/ | ||
size_t hunicode_cchar_string_length(const char *str); | ||
|
||
/** \brief 获取wchar_t字符串长度 | ||
* | ||
* \param str const wchar_t* 字符串指针 | ||
* \return size_t 字符串长度 | ||
* | ||
*/ | ||
size_t hunicode_wchar_t_string_length(const wchar_t *str); | ||
|
||
/** \brief 获取hunicode_char_t字符串长度 | ||
* | ||
* \param str const hunicode_char_t* 字符串指针 | ||
* \return size_t 字符串长度 | ||
* | ||
*/ | ||
size_t hunicode_char_string_length(const hunicode_char_t *str); | ||
|
||
/** \brief 从wchar_t转换到hunicode_char_t | ||
* | ||
* \param dest hunicode_char_t* hunicode_char_t指针 | ||
* \param dest_length size_t hunicode_char_t指针所指的缓冲区长度 | ||
* \param src const wchar_t* wchar_t指针 | ||
* \param src_length size_t wchar_t指针所指的缓冲区长度 | ||
* | ||
*/ | ||
void hunicode_char_from_wchar(hunicode_char_t *dest,size_t dest_length,const wchar_t *src,size_t src_length); | ||
|
||
/** \brief 从wchar_t字符串转换到hunicode_char_t | ||
* | ||
* \param dest hunicode_char_t* hunicode_char_t指针 | ||
* \param dest_length size_t hunicode_char_t指针所指的缓冲区长度 | ||
* \param src const wchar_t* wchar_t字符串指针 | ||
* | ||
*/ | ||
void hunicode_char_from_wchar_string(hunicode_char_t *dest,size_t dest_length,const wchar_t *src); | ||
|
||
/** \brief 从hunicode_char_t转换到wchar_t | ||
* | ||
* \param dest wchar_t* wchar_t指针 | ||
* \param dest_length size_t wchar_t指针所指的缓冲区长度 | ||
* \param src const hunicode_char_t* hunicode_char_t指针 | ||
* \param src_length size_t hunicode_char_t指针所指的缓冲区长度 | ||
* | ||
*/ | ||
void hunicode_char_to_wchar(wchar_t *dest,size_t dest_length,const hunicode_char_t *src,size_t src_length); | ||
|
||
/** \brief 从hunicode_char_t字符串转换到wchar_t | ||
* | ||
* \param dest wchar_t* wchar_t指针 | ||
* \param dest_length size_t wchar_t指针所指的缓冲区长度 | ||
* \param src const hunicode_char_t* hunicode_char_t字符串指针 | ||
* | ||
*/ | ||
void hunicode_char_string_to_wchar(wchar_t *dest,size_t dest_length,const hunicode_char_t *src); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif // __cplusplus | ||
|
||
|
||
#endif // __HUNICODE_H_INCLUDED__ |