-
Notifications
You must be signed in to change notification settings - Fork 8
/
tools.c
2004 lines (1837 loc) · 59.1 KB
/
tools.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//this file codes is for windows
#ifdef _MSC_VER
#ifndef _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_WARNINGS
#endif
#endif
#ifdef _MSC_VER
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif // !WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <time.h>
#include <io.h>
//# if defined(WIN32)
//# define snprintf _snprintf
//# endif
#else //__GNUC__
#include <errno.h>
#include <stdarg.h>
#include <unistd.h>
#include <sys/types.h>
#include <dirent.h>
#include <sys/stat.h>
#include <time.h>
#include <sys/time.h>
#include <iconv.h>
#endif
#include "tools.h"
//#include "membuf.h"
#include <stdlib.h>
#include <stdio.h>
#include <memory.h>
#include <string.h>
#include <limits.h>
#include <stdarg.h>
//-----------------------------------------------------------------------------------membuf c-str win/linux
#pragma region membuf c-str
#include <assert.h>
//初始化
void membuf_init(membuf_t* buf, size_t initial_buffer_size) {
memset(buf, 0, sizeof(membuf_t));
buf->data = initial_buffer_size > 0 ? (uchar*)calloc(1, initial_buffer_size) : NULL;
//memset(buf->data, 0, initial_buffer_size);
buf->buffer_size = initial_buffer_size;
}
//释放buffer
void membuf_uninit(membuf_t* buf) {
if (buf->data)
free(buf->data);
memset(buf, 0, sizeof(membuf_t));
}
//清除数据(数据覆盖为NULL),并缩小buffer大小
void membuf_clear(membuf_t* buf, size_t maxSize) {
if (buf->data && buf->size) {
if (maxSize > 1 && buf->buffer_size > maxSize) {
uchar* p = (uchar*)realloc(buf->data, maxSize);
//防止realloc分配失败,或返回的地址一样
assert(p);
if (p != buf->data)
buf->data = p;
buf->size = 0;
buf->buffer_size = maxSize;
}
else {
buf->size = 0;
}
memset(buf->data, 0, buf->buffer_size);
}
}
////扩展buffer大小
void membuf_reserve(membuf_t* buf, size_t extra_size) {
if (extra_size > buf->buffer_size - buf->size) {
//calculate new buffer size
size_t new_buffer_size = buf->buffer_size == 0 ? extra_size : buf->buffer_size << 1;
size_t new_data_size = buf->size + extra_size;
while (new_buffer_size < new_data_size)
new_buffer_size <<= 1;
// malloc/realloc new buffer
uchar* p = (uchar*)realloc(buf->data, new_buffer_size); // realloc new buffer
//防止realloc分配失败,或返回的地址一样
assert(p);
if (p != buf->data)
buf->data = p;
memset((buf->data + buf->size), 0, new_buffer_size - buf->size);
buf->buffer_size = new_buffer_size;
}
}
//截断(释放)多余的内存 或者增加内存,至 size+4 的大小; 后面4字节填充0
void membuf_trunc(membuf_t* buf) {
if (buf->buffer_size > (buf->size + 4) || buf->buffer_size < (buf->size + 4)) {
uchar* p = (uchar*)realloc(buf->data, buf->size + 4); // realloc new buffer
//防止realloc分配失败,或返回的地址一样
assert(p);
if (p && p != buf->data)
buf->data = p;
memset((buf->data + buf->size), 0, 4);
buf->buffer_size = buf->size + 4;
}
}
//添加C-style字符串
size_t membuf_append(membuf_t* buf, const char* str) {
if (str == NULL) return 0;
size_t size = strlen(str);
membuf_reserve(buf, size);
memmove((buf->data + buf->size), str, size);
buf->size += size;
return size;
}
//添加数据
size_t membuf_append_data(membuf_t* buf, const void* data, size_t size) {
assert(data && size > 0);
membuf_reserve(buf, size);
memmove((buf->data + buf->size), data, size);
buf->size += size;
return size;
}
//按格式添加数据
size_t membuf_append_format(membuf_t* buf, const char* fmt, ...) {
assert(fmt);
va_list ap, ap2;
va_start(ap, fmt);
size_t size = vsnprintf(0, 0, fmt, ap) + 1;
va_end(ap);
membuf_reserve(buf, size);
va_start(ap2, fmt);
vsnprintf((char*)(buf->data + buf->size), size, fmt, ap2);
va_end(ap2);
buf->size += --size;
return size;
}
//插入数据:offset位置,data数据,size数据大小
void membuf_insert(membuf_t* buf, size_t offset, void* data, size_t size) {
assert(offset < buf->size);
membuf_reserve(buf, size);
memcpy((buf->data + offset + size), buf->data + offset, buf->size - offset);
memcpy((buf->data + offset), data, size);
buf->size += size;
}
//从末尾移动数据(不会填充为NULL,仅更改size)
void membuf_move(membuf_t* buf, size_t offset, size_t size) {
assert(offset < buf->size);
if (offset + size >= buf->size) {
buf->size = offset;
}
else {
//memmove() 用来复制内存内容(可以处理重叠的内存块):void * memmove(void *dest, const void *src, size_t num);
memmove((buf->data + offset), buf->data + offset + size, buf->size - offset - size);
buf->size -= size;
}
if (buf->buffer_size >= buf->size)
buf->data[buf->size] = 0;
}
#pragma endregion
//-----------------------------------------------------------------------------------文件/文件夹检测 win/linux
#pragma region 文件/文件夹检测
#ifdef _MSC_VER
#include <direct.h>
//获取工作目录路径,不带'\'
char* getWorkPath() {
static char CurPath[260] = { 0 };
GetCurrentDirectory(259, CurPath);
return CurPath;
}
//获取程序文件所在路径,不带'\'
char* getProcPath() {
static char CurPath[260] = { 0 };
GetModuleFileName(GetModuleHandle(NULL), CurPath, 259);
//获取当前目录绝对路径,即去掉程序名,包括去掉最后的'\'
size_t i = strlen(CurPath) - 1;
for (; i > 0 && CurPath[i] != '\\'; --i) {
CurPath[i] = 0;
}
if (i > 2 && CurPath[i] == '\\')
CurPath[i] = 0;
return CurPath;
}
//建立目录,递归建立
int makeDir(const char * path, int mod) {
char pth[513];
strncpy(pth, path, 512);
char *p = strrchr(pth, '\\');
if (!p)
p = (char*)strrchr(pth, '/');
if (p) {
if (strlen(p) == 1)
*p = 0;
p = strrchr(pth, '\\');
if (!p)
p = (char*)strrchr(pth, '/');
if (p) {
*p = 0;
if (!isExist(pth))
makeDir(pth, mod);
}
}
return _mkdir(path);
}
//获取文件/文件夹信息
inline struct _finddata_t GetFileInfo(const char* lpPath) {
struct _finddata_t fileinfo;
memset(&fileinfo, 0, sizeof(struct _finddata_t));
intptr_t hFind = _findfirst(lpPath, &fileinfo);
_findclose(hFind);
return fileinfo;
}
//路径是否存在(0:不存在 1:存在:文件 2:存在:文件夹)
char isExist(const char* path) {
struct _finddata_t fd = GetFileInfo(path);
return (fd.name[0] && fd.attrib) ? ((fd.attrib & FILE_ATTRIBUTE_DIRECTORY) ? 2 : 1) : 0;
}
//是否文件(1:是文件 0:非文件/不存在)
char isFile(const char* path) {
struct _finddata_t fd = GetFileInfo(path);
return (fd.name[0] && !(fd.attrib & FILE_ATTRIBUTE_DIRECTORY));
}
//是否目录(1:是目录 0;非目录/不存在)
char isDir(const char* path) {
struct _finddata_t fd = GetFileInfo(path);
return (fd.name[0] && (fd.attrib & FILE_ATTRIBUTE_DIRECTORY));
}
//返回列表目录Json字符串,need free the return
char* listDir(const char* fullpath, const char* reqPath) {
int fnum = 0;
membuf_t buf;
membuf_init(&buf, 2048);
membuf_append_format(&buf, "{\"path\":\"%s\",\"files\":[\r\n", reqPath);
//文件(size>-1) 或 目录(size=-1) [name:"file1.txt",mtime:"2016-11-28 16:25:46",size:123],\r\n
struct _finddatai64_t fdt;
intptr_t hFind;
char szFind[256];
snprintf(szFind, 255, "%s\\*", fullpath);
hFind = _findfirsti64(szFind, &fdt);
//[name:"file1.txt",mtime:"2016-11-28 16:25:46",size:123],\r\n
while (hFind != -1) {//一次查找循环
//最后修改时间
struct tm *t = localtime(&fdt.time_write);//年月日 时分秒
if (fdt.attrib & FILE_ATTRIBUTE_DIRECTORY) {//文件夹
if (strncmp(fdt.name, ".", 1) == 0) {
if (_findnexti64(hFind, &fdt))
break;//下一个文件
continue;
}
membuf_append_format(&buf, "{\"name\":\"%s/\",\"mtime\":\"%d-%02d-%02d %02d:%02d:%02d\",\"size\":\"-\",\"type\":\"D\"},\n", fdt.name, t->tm_year + 1900, t->tm_mon + 1, t->tm_mday, t->tm_hour, t->tm_min, t->tm_sec);
}
else {//文件
fnum++;
membuf_append_format(&buf, "{\"name\":\"%s\",\"mtime\":\"%d-%02d-%02d %02d:%02d:%02d\",\"size\":%lld,\"type\":\"F\"},\n", fdt.name, t->tm_year + 1900, t->tm_mon + 1, t->tm_mday, t->tm_hour, t->tm_min, t->tm_sec, fdt.size);
}
if (_findnexti64(hFind, &fdt))
break;//下一个文件
}
_findclose(hFind);
//membuf_remove(&buf, buf.size-1, 1);
buf.data[--buf.size] = 0; buf.data[--buf.size] = 0;
membuf_append_format(&buf, "],total:%d}", fnum);
//window下需要转换为UTF8编码,以发送给客户端
membuf_trunc(&buf);
return (char*)buf.data;
}
#else //_GNUC_
//获取工作目录路径,不带'/'
char* getWorkPath() {
static char CurPath[260] = { 0 };
getcwd(CurPath, 259);
return CurPath;
}
//建立目录,递归建立
int makeDir(const char * path, int mod) {
char pth[513];
strncpy(pth, path, 512);
char *p = strrchr(pth, '\\');
if (!p)
p = (char*)strrchr(pth, '/');
if (p) {
if (strlen(p) == 1)
*p = 0;
p = strrchr(pth, '\\');
if (!p)
p = (char*)strrchr(pth, '/');
if (p) {
*p = 0;
if (!isExist(pth))
makeDir(pth, mod);
}
}
return mkdir(path, mod);
}
//获取程序文件所在路径,不带'/'
char* getProcPath() {
static char CurPath[260] = { 0 };
int cnt = readlink("/proc/self/exe", CurPath, 259);
if (cnt > 0 || cnt < 260) {
//获取程序路径,即去掉程序名,包括去掉最后的'/'
int i;
for (i = cnt - 1; i > 0 && CurPath[i] != '/'; --i) {
CurPath[i] = 0;
}
if (i > 2 && CurPath[i] == '/')
CurPath[i] = 0;
}
return CurPath;
}
//路径是否存在(0:不存在 1:存在:文件 2:存在:文件夹)
char isExist(const char* path) {
if (path && access(path, F_OK) == 0) {
struct stat info;
stat(path, &info);
if (S_ISDIR(info.st_mode) || S_ISLNK(info.st_mode))//dir or link
return 2;
return 1;
}
return 0;
}
//是否目录(1:是目录 0;非目录/不存在)
char isDir(const char* path) {
if (path && access(path, F_OK) == 0) {// && opendir(path)!=NULL)
struct stat info;
stat(path, &info);
if (S_ISDIR(info.st_mode) || S_ISLNK(info.st_mode))//dir or link
return 1;
}
return 0;
}
//是否文件(1:是文件 0:非文件/不存在)
char isFile(const char* path) {
if (path && access(path, F_OK) == 0) {
struct stat info;
stat(path, &info);
if (S_ISREG(info.st_mode))//普通文件
return 1;
}
return 0;
}
//返回列表目录Json字符串,need free the return
char* listDir(const char* fullpath, const char* reqPath) {
int fnum = 0;
char tmp[1024];
struct tm *mtime;
DIR *dp;
struct dirent *fileInfo;
struct stat statbuf;
membuf_t buf;
membuf_init(&buf, 2048);
membuf_append_format(&buf, "{\"path\":\"%s\",\"files\":[\r\n", reqPath);
//文件(size>-1) 或 目录(size=-1) [name:"file1.txt",mtime:"2016-11-28 16:25:46",size:123],\r\n
if ((dp = opendir(fullpath)) != NULL) {
while ((fileInfo = readdir(dp)) != NULL) {
snprintf(tmp, 1023, "%s/%s", fullpath, fileInfo->d_name);
stat(tmp, &statbuf);//stat函数需要传入绝对路径或相对(工作目录的)路径
mtime = localtime(&statbuf.st_mtime);
if (S_ISDIR(statbuf.st_mode)) {
if (strncmp(fileInfo->d_name, ".", 1) == 0)
continue;
membuf_append_format(&buf, "{\"name\":\"%s/\",\"mtime\":\"%d-%02d-%02d %02d:%02d:%02d\",\"size\":\"-\",\"type\":\"D\"},\n", fileInfo->d_name, (1900 + mtime->tm_year), (1 + mtime->tm_mon), mtime->tm_mday, mtime->tm_hour, mtime->tm_min, mtime->tm_sec);
}
else {
fnum++;
membuf_append_format(&buf, "{\"name\":\"%s\",\"mtime\":\"%d-%02d-%02d %02d:%02d:%02d\",\"size\":%ld,\"type\":\"F\"},\n", fileInfo->d_name, (1900 + mtime->tm_year), (1 + mtime->tm_mon), mtime->tm_mday, mtime->tm_hour, mtime->tm_min, mtime->tm_sec, statbuf.st_size);
}
}
closedir(dp);
}
//membuf_remove(&buf, buf.size - 1, 1);
buf.data[--buf.size] = 0; buf.data[--buf.size] = 0;
membuf_append_format(&buf, "],total:%d}", fnum);
membuf_trunc(&buf);
return (char*)buf.data;
}
#endif
#pragma endregion
//-----------------------------------------------------------------------------------编码转换 win/linux
#pragma region 编码转换
/*****************************************************************************
* 将一个字符的Unicode(UCS-2和UCS-4)编码转换成UTF-8编码.
*
* 参数:
* unic 字符的Unicode编码值
* pOutput 指向输出的用于存储UTF8编码值的缓冲区的指针
* outsize pOutput缓冲的大小
*
* 返回值:
* 返回转换后的字符的UTF8编码所占的字节数, 如果出错则返回 0 .
*
* 注意:
* 1. UTF8没有字节序问题, 但是Unicode有字节序要求;
* 字节序分为大端(Big Endian)和小端(Little Endian)两种;
* 在Intel处理器中采用小端法表示, 在此采用小端法表示. (低地址存低位)
* 2. 请保证 pOutput 缓冲区有最少有 6 字节的空间大小!
****************************************************************************/
int enc_unicode_to_utf8_one(size_t unic, uchar *pOutput, int outSize) {
assert(pOutput != NULL);
assert(outSize >= 6);
if (unic <= 0x0000007F) {
// U-00000000 - U-0000007F: 0xxxxxxx
*pOutput = (unic & 0x7F);
return 1;
}
else if (unic >= 0x00000080 && unic <= 0x000007FF) {
// * U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
*pOutput = ((unic >> 6) & 0x1F) | 0xC0;
*(pOutput + 1) = (unic & 0x3F) | 0x80;
return 2;
}
else if (unic >= 0x00000800 && unic <= 0x0000FFFF) {
// U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
*pOutput = ((unic >> 12) & 0x0F) | 0xE0;
*(pOutput + 1) = ((unic >> 6) & 0x3F) | 0x80;
*(pOutput + 2) = (unic & 0x3F) | 0x80;
return 3;
}
else if (unic >= 0x00010000 && unic <= 0x001FFFFF) {
// U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
*pOutput = ((unic >> 18) & 0x07) | 0xF0;
*(pOutput + 1) = ((unic >> 12) & 0x3F) | 0x80;
*(pOutput + 2) = ((unic >> 6) & 0x3F) | 0x80;
*(pOutput + 3) = (unic & 0x3F) | 0x80;
return 4;
}
else if (unic >= 0x00200000 && unic <= 0x03FFFFFF) {
// U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*pOutput = ((unic >> 24) & 0x03) | 0xF8;
*(pOutput + 1) = ((unic >> 18) & 0x3F) | 0x80;
*(pOutput + 2) = ((unic >> 12) & 0x3F) | 0x80;
*(pOutput + 3) = ((unic >> 6) & 0x3F) | 0x80;
*(pOutput + 4) = (unic & 0x3F) | 0x80;
return 5;
}
else if (unic >= 0x04000000 && unic <= 0x7FFFFFFF) {
// U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*pOutput = ((unic >> 30) & 0x01) | 0xFC;
*(pOutput + 1) = ((unic >> 24) & 0x3F) | 0x80;
*(pOutput + 2) = ((unic >> 18) & 0x3F) | 0x80;
*(pOutput + 3) = ((unic >> 12) & 0x3F) | 0x80;
*(pOutput + 4) = ((unic >> 6) & 0x3F) | 0x80;
*(pOutput + 5) = (unic & 0x3F) | 0x80;
return 6;
}
return 0;
}
int enc_get_utf8_size(const unsigned char pInput) {
unsigned char c = pInput;
// 0xxxxxxx 返回0 0x0
// 10xxxxxx 不存在 0x80
// 110xxxxx 返回2 0xC0
// 1110xxxx 返回3 0xE0
// 11110xxx 返回4 0xF0
// 111110xx 返回5 0xF8
// 1111110x 返回6 0xFC
if (c < 0x80) return 1;
if (c >= 0x80 && c < 0xC0) return -1;
if (c >= 0xC0 && c < 0xE0) return 2;
if (c >= 0xE0 && c < 0xF0) return 3;
if (c >= 0xF0 && c < 0xF8) return 4;
if (c >= 0xF8 && c < 0xFC) return 5;
if (c >= 0xFC) return 6;
return 1;
}
/*****************************************************************************
* 将一个字符的UTF8编码转换成Unicode(UCS-2和UCS-4)编码.
*
* 参数:
* pInput 指向输入缓冲区, 以UTF-8编码
* Unic 指向输出缓冲区, 其保存的数据即是Unicode编码值,
* 类型为ulong .
*
* 返回值:
* 成功则返回该字符的Unicode编码所占用的字节数; 失败则返回0.
*
* 注意:
* 1. UTF8没有字节序问题, 但是Unicode有字节序要求;
* 字节序分为大端(Big Endian)和小端(Little Endian)两种;
* 在Intel处理器中采用小端法表示, 在此采用小端法表示. (低地址存低位)
****************************************************************************/
int enc_utf8_to_unicode_one(const uchar* pInput, uchar *Unic) {
assert(pInput != NULL && Unic != NULL);
// b1 表示UTF-8编码的pInput中的高字节, b2 表示次高字节, ...
char b1, b2, b3, b4, b5, b6;
*Unic = 0x0; // 把 *Unic 初始化为全零
int utfbytes = enc_get_utf8_size(*pInput);
uchar *pOutput = (uchar *)Unic;
switch (utfbytes) {
case 1://1字节
*pOutput = *pInput;
break;
case 2://2字节
b1 = *pInput;
b2 = *(pInput + 1);
if ((b2 & 0xC0) != 0x80)
return 0;
*pOutput = (b1 << 6) + (b2 & 0x3F);
*(pOutput + 1) = (b1 >> 2) & 0x07;
break;
case 3:
b1 = *pInput;
b2 = *(pInput + 1);
b3 = *(pInput + 2);
if (((b2 & 0xC0) != 0x80) || ((b3 & 0xC0) != 0x80))
return 0;
*pOutput = (b2 << 6) + (b3 & 0x3F);
*(pOutput + 1) = (b1 << 4) + ((b2 >> 2) & 0x0F);
break;
case 4:
b1 = *pInput;
b2 = *(pInput + 1);
b3 = *(pInput + 2);
b4 = *(pInput + 3);
if (((b2 & 0xC0) != 0x80) || ((b3 & 0xC0) != 0x80)
|| ((b4 & 0xC0) != 0x80))
return 0;
*pOutput = (b3 << 6) + (b4 & 0x3F);
*(pOutput + 1) = (b2 << 4) + ((b3 >> 2) & 0x0F);
*(pOutput + 2) = ((b1 << 2) & 0x1C) + ((b2 >> 4) & 0x03);
break;
case 5:
b1 = *pInput;
b2 = *(pInput + 1);
b3 = *(pInput + 2);
b4 = *(pInput + 3);
b5 = *(pInput + 4);
if (((b2 & 0xC0) != 0x80) || ((b3 & 0xC0) != 0x80)
|| ((b4 & 0xC0) != 0x80) || ((b5 & 0xC0) != 0x80))
return 0;
*pOutput = (b4 << 6) + (b5 & 0x3F);
*(pOutput + 1) = (b3 << 4) + ((b4 >> 2) & 0x0F);
*(pOutput + 2) = (b2 << 2) + ((b3 >> 4) & 0x03);
*(pOutput + 3) = (b1 << 6);
break;
case 6:
b1 = *pInput;
b2 = *(pInput + 1);
b3 = *(pInput + 2);
b4 = *(pInput + 3);
b5 = *(pInput + 4);
b6 = *(pInput + 5);
if (((b2 & 0xC0) != 0x80) || ((b3 & 0xC0) != 0x80)
|| ((b4 & 0xC0) != 0x80) || ((b5 & 0xC0) != 0x80)
|| ((b6 & 0xC0) != 0x80))
return 0;
*pOutput = (b5 << 6) + (b6 & 0x3F);
*(pOutput + 1) = (b5 << 4) + ((b6 >> 2) & 0x0F);
*(pOutput + 2) = (b3 << 2) + ((b4 >> 4) & 0x03);
*(pOutput + 3) = ((b1 << 6) & 0x40) + (b2 & 0x3F);
break;
default:
utfbytes = 0;
break;
}
return utfbytes;
}
char* enc_u2u8(const char* data, uint* len) {
size_t t, i;
membuf_t buf;
membuf_init(&buf, 128);
(*len)--;
for (i = 0; i <= *len; ) {
if (buf.buffer_size - buf.size < 7)
membuf_reserve(&buf, 7);
t = enc_unicode_to_utf8_one(*(uint*)(data + i), (buf.data + buf.size), 7);
if (t == 0) break;
buf.size += t;
}
membuf_trunc(&buf);
*len = buf.size;
return (char*)buf.data;
}
char* enc_u82u(const char* data, uint* len) {
size_t t, i;
membuf_t buf;
membuf_init(&buf, 128);
for (i = 0; i < *len;) {
if (buf.buffer_size - buf.size < 4)
membuf_reserve(&buf, 4);
t = enc_utf8_to_unicode_one((const uchar*)(data + i), (uchar*)(buf.data + buf.size));
if (t == 0) break;
buf.size += 2;
i += t;
}
membuf_trunc(&buf);
*len = buf.size;
return (char*)buf.data;
}
#ifdef _MSC_VER
//GB2312 to unicode
wchar_t* GB2U(const char* pszGbs, uint* wLen) {
*wLen = MultiByteToWideChar(CP_ACP, 0, pszGbs, -1, NULL, 0);
wchar_t* wStr = (wchar_t*)malloc(*wLen * sizeof(wchar_t));
MultiByteToWideChar(CP_ACP, 0, pszGbs, -1, wStr, *wLen);
return wStr;
}
//unicode to utf8
char* U2U8(const wchar_t* wszUnicode, uint* aLen) {
*aLen = WideCharToMultiByte(CP_UTF8, 0, (PWSTR)wszUnicode, -1, NULL, 0, NULL, NULL);
char* szStr = (char*)malloc(*aLen * sizeof(char));
WideCharToMultiByte(CP_UTF8, 0, (PWSTR)wszUnicode, -1, szStr, *aLen, NULL, NULL);
return szStr;
}
//utf8 to unicode
wchar_t* U82U(const char* szU8, uint* wLen) {
*wLen = MultiByteToWideChar(CP_UTF8, 0, szU8, -1, NULL, 0);
wchar_t* wStr = (wchar_t*)malloc(*wLen * sizeof(wchar_t));
MultiByteToWideChar(CP_UTF8, 0, szU8, -1, wStr, *wLen);
return wStr;
}
//unicode to GB2312
char* U2GB(const wchar_t* wszUnicode, uint* aLen) {
*aLen = WideCharToMultiByte(CP_ACP, 0, wszUnicode, -1, NULL, 0, NULL, NULL);
char* szStr = (char*)malloc(*aLen * sizeof(char));
WideCharToMultiByte(CP_ACP, 0, wszUnicode, -1, szStr, *aLen, NULL, NULL);
return szStr;
}
//GB2312 to utf8
char* GB2U8(const char* pszGbs, uint* aLen) {
*aLen = MultiByteToWideChar(CP_ACP, 0, pszGbs, -1, NULL, 0);
wchar_t* wStr = (wchar_t*)malloc(*aLen * sizeof(wchar_t));
MultiByteToWideChar(CP_ACP, 0, pszGbs, -1, wStr, *aLen);
*aLen = WideCharToMultiByte(CP_UTF8, 0, (PWSTR)wStr, -1, NULL, 0, NULL, NULL);
char* szStr = (char*)malloc(*aLen * sizeof(char));
WideCharToMultiByte(CP_UTF8, 0, (PWSTR)wStr, -1, szStr, *aLen, NULL, NULL);
free(wStr);
return szStr;
}
//utf8 to GB2312
char* U82GB(const char* szU8, uint* aLen) {
*aLen = MultiByteToWideChar(CP_UTF8, 0, szU8, -1, NULL, 0);
wchar_t* wStr = (wchar_t*)malloc(*aLen * sizeof(wchar_t));
MultiByteToWideChar(CP_UTF8, 0, szU8, -1, wStr, *aLen);
*aLen = WideCharToMultiByte(CP_ACP, 0, wStr, -1, NULL, 0, NULL, NULL);
char* szStr = (char*)malloc(*aLen * sizeof(char));
WideCharToMultiByte(CP_ACP, 0, wStr, -1, szStr, *aLen, NULL, NULL);
free(wStr);
return szStr;
}
#else
//代码转换:从一种编码转为另一种编码
size_t code_convert(const char *from_charset, const char *to_charset, const char *inbuf, size_t inlen, char *outbuf, size_t* outlen) {
iconv_t cd;
size_t rc = 0, len = *outlen;
char **pin = &inbuf;
char **pout = &outbuf;
cd = iconv_open(to_charset, from_charset);
if (cd == 0)
return -1;
memset(outbuf, 0, len);
if (iconv(cd, pin, (size_t*)&inlen, pout, (size_t*)&len) == -1)
rc = -1;
iconv_close(cd);
*outlen -= len;//返回已用长度
return rc;
}
//GB2312 to unicode(need free) 返回字串长度为:实际长度+1, 末尾\0站一字节(需要释放)
char* GB2U(const char* pszGbs, uint* aLen) {
size_t len = *aLen * 4;
char *outbuf = (char*)malloc(len + 1); outbuf[0] = 0;
size_t rc = code_convert("gb2312", "unicode", pszGbs, *aLen, outbuf, &len);
if (rc < 0) *aLen = rc;
else *aLen = len + 1;
return outbuf;
}
//unicode to utf8(need free) 返回字串长度为:实际长度+1, 末尾\0站一字节(需要释放)
char* U2U8(const char* wszUnicode, uint* aLen) {
size_t len = *aLen;
char *outbuf = (char*)malloc(len + 1); outbuf[0] = 0;
size_t rc = code_convert("unicode", "utf-8", wszUnicode, *aLen, outbuf, &len);
if (rc < 0) *aLen = rc;
else *aLen = len + 1;
return outbuf;
}
//utf8 to unicode(need free) 返回字串长度为:实际长度+1, 末尾\0站一字节(需要释放)
char* U82U(const char* szU8, uint* aLen) {
size_t len = *aLen * 2;
char *outbuf = (char*)malloc(len + 1); outbuf[0] = 0;
size_t rc = code_convert("utf-8", "unicode", szU8, *aLen, outbuf, &len);
if (rc < 0) *aLen = rc;
else *aLen = len + 1;
return outbuf;
}
//unicode to GB2312(need free) 返回字串(需要释放)长度为:实际长度+1, 末尾\0站一字节
char* U2GB(const char* wszUnicode, uint* aLen) {
size_t len = *aLen;
char *outbuf = (char*)malloc(len + 1); outbuf[0] = 0;
size_t rc = code_convert("unicode", "gb2312", wszUnicode, *aLen, outbuf, &len);
if (rc < 0) *aLen = rc;
else *aLen = len + 1;
return outbuf;
}
//GB2312 to utf8(need free) 返回字串(需要释放)长度为:实际长度+1, 末尾\0站一字节
char* GB2U8(const char* pszGbs, uint* aLen) {
size_t len = *aLen * 3;
char *outbuf = (char*)malloc(len + 1); outbuf[0] = 0;
size_t rc = code_convert("gb2312", "utf-8", pszGbs, *aLen, outbuf, &len);
if (rc < 0) *aLen = rc;
else *aLen = len + 1;
return outbuf;
}
//utf8 to GB2312(need free) 返回字串(需要释放)长度为:实际长度+1, 末尾\0站一字节
char* U82GB(const char* szU8, uint* aLen) {
size_t len = *aLen;
char *outbuf = (char*)malloc(len + 1); outbuf[0] = 0;
size_t rc = code_convert("utf-8", "gb2312", szU8, *aLen, outbuf, &len);
if (rc < 0) *aLen = rc;
else *aLen = len + 1;
return outbuf;
}
#endif
/***************************************************************************
* 函数名称: UTF8ToUCS2
* 功能描述: 转换UTF8格式到UCS2格式(UCS2是双字节编码,Unicode是其中一种)
* 日 期: 2008-05-22 13:36:56
* 作 者: lianxiuzhu
* 参数说明: binUTF8 - UTF8字节流数组
* uCount - UTF8字节流数组中的字节数
* binUCS2 - UCS2字节流数组
* 返 回 值: 转换到UCS2字节流数组中的U16单元个数
***************************************************************************/
size_t UTF8ToUCS2(const uchar* binUTF8, size_t uCount, ushort* binUCS2) {
size_t uLength = 0;
uchar* szTemp = (uchar*)binUTF8;
while ((uint)(szTemp - binUTF8) < uCount) {
if (*szTemp <= 0x7F) //0xxxxxxx
{
binUCS2[uLength] = binUCS2[uLength] | (ushort)(*szTemp & 0x7F);
szTemp = szTemp + 1;
}
else if (*szTemp <= 0xDF) //110xxxxx 10xxxxxx
{
binUCS2[uLength] = binUCS2[uLength] | (ushort)(*(szTemp + 1) & 0x3F);
binUCS2[uLength] = binUCS2[uLength] | ((ushort)(*(szTemp) & 0x1F) << 6);
szTemp = szTemp + 2;
}
else if (*szTemp <= 0xEF) //1110xxxx 10xxxxxx 10xxxxxx
{
binUCS2[uLength] = binUCS2[uLength] | (ushort)(*(szTemp + 2) & 0x3F);
binUCS2[uLength] = binUCS2[uLength] | ((ushort)(*(szTemp + 1) & 0x3F) << 6);
binUCS2[uLength] = binUCS2[uLength] | ((ushort)(*(szTemp) & 0x0F) << 12);
szTemp = szTemp + 3;
}
else {
return 0;
}
uLength++;
}
return uLength;
}
#pragma endregion
//-----------------------------------------------------------------------------------url编码解码 win/linux
#pragma region url编码解码
//url编码 (len为buf的长度)
char* url_encode(const char *url, uint* len) {
if (!url)
return NULL;
membuf_t buf;
const char *p;
const char urlunsafe[] = "\r\n \"#%&+:;<=>?@[\\]^`{|}";
const char hex[] = "0123456789ABCDEF";
char enc[3] = { '%',0,0 };
len--;
membuf_init(&buf, strlen(url) + 1);
for (p = url; *p; p++) {
if ((p - url) > *len)
break;
if (*p < ' ' || *p > '~' || strchr(urlunsafe, *p)) {
enc[1] = hex[*p >> 4];
enc[2] = hex[*p & 0x0f];
membuf_append_data(&buf, enc, 3);
}
else {
membuf_append_data(&buf, p, 1);
}
}
membuf_trunc(&buf);
*len = buf.size;
return (char*)buf.data;
}
//url解码
char* url_decode(char *url) {
char *o, *s;
uint tmp;
for (o = s = url; *s; s++, o++) {
if (*s == '%' && strlen(s) > 2 && sscanf(s + 1, "%2x", &tmp) == 1) {
*o = (char)tmp;
s += 2;
}
else {
*o = *s;
}
}
*o = '\0';
return url;
}
#pragma endregion
//-----------------------------------------------------------------------------------Base64编码解码 win/linux
#pragma region Base64编码解码
char base64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
char base64_end = '=';
#define is_base64(c) (isalnum(c) || (c == '+') || (c == '/'))
//Base64编码,需要释放返回值(need free return)
char* base64_Encode(const uchar* bytes_to_encode, uint in_len) {
membuf_t ret;
int i = 0, j = 0;
uchar char_array_3[3];
uchar char_array_4[4];
membuf_init(&ret, in_len * 3);//初始化缓存字节数为 长度的3被
while (in_len--) {
char_array_3[i++] = *(bytes_to_encode++);
if (i == 3) {
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;
for (i = 0; (i < 4); i++)
membuf_append_data(&ret, &base64_table[char_array_4[i]], 1);
i = 0;
}
}
if (i) {
for (j = i; j < 3; j++)
char_array_3[j] = '\0';
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;
for (j = 0; (j < i + 1); j++)
membuf_append_data(&ret, &base64_table[char_array_4[j]], 1);
while ((i++ < 3))
membuf_append_data(&ret, &base64_end, 1);
}
return (char*)ret.data;
}
//Base64解码,需要释放返回值(need free return)
char* base64_Decode(const char* encoded_string) {
size_t in_len = strlen(encoded_string);
int i = 0;
int j = 0;
size_t in_ = 0;
uchar char_array_4[4], char_array_3[3];
membuf_t ret;
membuf_init(&ret, strlen(encoded_string) / 3 + 1);
while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
char_array_4[i++] = encoded_string[in_]; in_++;
if (i == 4) {
for (i = 0; i < 4; i++)
//char_array_4[i] = strstr(base64_table,(char*)&char_array_4[i])[0];
char_array_4[i] = strchr(base64_table, char_array_4[i]) - base64_table;
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (i = 0; (i < 3); i++)
membuf_append_data(&ret, &char_array_3[i], 1);
i = 0;
}
}
if (i) {
for (j = i; j < 4; j++)
char_array_4[j] = 0;
for (j = 0; j < 4; j++)
//char_array_4[j] = strstr(base64_table, (char*)&char_array_4[j])[0];
char_array_4[j] = strchr(base64_table, char_array_4[j]) - base64_table;
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (j = 0; (j < i - 1); j++)
membuf_append_data(&ret, &char_array_3[j], 1);
}
return (char*)ret.data;
}
#pragma endregion
//-----------------------------------------------------------------------------------MD5计算摘要 win/unix
#pragma region MD5计算摘要
/*
* The basic MD5 functions.
*
* F and G are optimized compared to their RFC 1321 definitions for
* architectures that lack an AND-NOT instruction, just like in Colin Plumb's
* implementation.
*/
#define MD5_F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
#define MD5_G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
#define MD5_H(x, y, z) (((x) ^ (y)) ^ (z))
#define MD5_H2(x, y, z) ((x) ^ ((y) ^ (z)))
#define MD5_I(x, y, z) ((y) ^ ((x) | ~(z)))
/*
* The MD5 transformation for all four rounds.
*/
#define MD5_STEP(f, a, b, c, d, x, t, s) \
(a) += f((b), (c), (d)) + (x) + (t); \
(a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
(a) += (b)
/*
* SET reads 4 input bytes in little-endian byte order and stores them in a
* properly aligned word in host byte order.
*
* The check for little-endian architectures that tolerate unaligned memory
* accesses is just an optimization. Nothing will break if it fails to detect
* a suitable architecture.
*
* Unfortunately, this optimization may be a C strict aliasing rules violation
* if the caller's data buffer has effective type that cannot be aliased by
* uint. In practice, this problem may occur if these MD5 routines are
* inlined into a calling function, or with future and dangerously advanced
* link-time optimizations. For the time being, keeping these MD5 routines in
* their own translation unit avoids the problem.
*/
#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
#define MD5_SET(n) \
(*(uint *)&ptr[(n) * 4])
#define MD5_GET(n) \
(ctx->block[(n)])/*SET(n)*/
#else
#define MD5_SET(n) \
(ctx->block[(n)] = \
(uint)ptr[(n) * 4] | \
((uint)ptr[(n) * 4 + 1] << 8) | \
((uint)ptr[(n) * 4 + 2] << 16) | \
((uint)ptr[(n) * 4 + 3] << 24))
#define MD5_GET(n) \
(ctx->block[(n)])
#endif
#define MD5_OUT(dst, src) \
(dst)[0] = (uchar)(src); \
(dst)[1] = (uchar)((src) >> 8); \
(dst)[2] = (uchar)((src) >> 16); \
(dst)[3] = (uchar)((src) >> 24)
// This processes one or more 64-byte data blocks, but does NOT update the bit counters. There are no alignment requirements.