-
Notifications
You must be signed in to change notification settings - Fork 2
/
stringslice.h
177 lines (141 loc) · 6.25 KB
/
stringslice.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#ifndef AEM_STRINGSLICE_H
#define AEM_STRINGSLICE_H
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#ifdef __unix__
# include <unistd.h>
#endif
#include <aem/aem.h>
struct aem_stringslice {
const char *start;
const char *end;
};
#define aem_stringslice_new(p, pe) ((struct aem_stringslice){.start = (p), .end = (pe)})
#define AEM_STRINGSLICE_EMPTY aem_stringslice_new(NULL, NULL)
static inline struct aem_stringslice aem_stringslice_new_len(const char *p, size_t n)
{
return aem_stringslice_new(p, &p[n]);
}
// The optimizer should be smart enough to elide the call to strlen if you pass
// in a string literal.
static inline struct aem_stringslice aem_stringslice_new_cstr(const char *p)
{
if (!p)
return AEM_STRINGSLICE_EMPTY;
return aem_stringslice_new_len(p, strlen(p));
}
#define aem_stringslice_new_sizeof(char_arr) aem_stringslice_new_len((char_arr), sizeof(char_arr)-1)
int aem_stringslice_file_write(struct aem_stringslice slice, FILE *fp);
#ifdef __unix__
ssize_t aem_stringslice_fd_write(struct aem_stringslice slice, int fd);
#endif
// Return true if stringslice length is non-zero
static inline int aem_stringslice_ok(struct aem_stringslice slice)
{
return slice.start != slice.end;
}
// Return length of stringslice
static inline size_t aem_stringslice_len(const struct aem_stringslice slice)
{
return slice.end - slice.start;
}
// Get next byte from stringslice (-1 if end), advance to next byte
static inline int aem_stringslice_getc(struct aem_stringslice *slice)
{
if (!slice || !aem_stringslice_ok(*slice))
return -1;
return (unsigned char)*slice->start++;
}
// Get a UTF-8 rune, or returns zero on invalid sequence or EOF.
// Implementation in utf8.c
int aem_stringslice_get_rune(struct aem_stringslice *slice, uint32_t *out_p);
// The same, but returns a stringslice of the bytes of the rune's encoding, or
// empty on failure.
static inline struct aem_stringslice aem_stringslice_match_rune(struct aem_stringslice *slice, uint32_t *out_p)
{
if (!slice)
return AEM_STRINGSLICE_EMPTY;
struct aem_stringslice out = *slice;
int ok = aem_stringslice_get_rune(slice, out_p);
if (!ok)
*slice = out;
out.end = slice->start;
return out;
}
// Get a UTF-8 rune.
// Deprecated because it returns -1 on both 0xFFFFFFFF and error; you must
// manually check whether slice->start moved to find out which.
aem_deprecated_msg("use aem_stringslice_get_rune instead")
int aem_stringslice_get(struct aem_stringslice *slice);
// Get raw data
// Reads `count` bytes into `buf`.
// If fewer than `count` bytes are available, does nothing and returns -1.
static inline int aem_stringslice_read_data(struct aem_stringslice *slice, void *buf, size_t count)
{
if (aem_stringslice_len(*slice) < count)
return -1;
memcpy(buf, slice->start, count);
slice->start += count;
return 0;
}
// TODO #ifdef AEM_CONFIG_HAVE_STMT_EXPR
#define AEM_STRINGSLICE_RD_TYPE(slice, T) __extension__({ \
T out; \
aem_stringslice_read_data(slice, &out, sizeof(out)); \
out; \
})
int aem_stringslice_match_ws(struct aem_stringslice *slice);
struct aem_stringslice aem_stringslice_trim(struct aem_stringslice slice);
// Consume a CR, CRLF, or LF at the current position.
// Returns 1 for LF, 2 for CR, 3 for CRLF, or 0 on failure
int aem_stringslice_match_newline(struct aem_stringslice *slice);
struct aem_stringslice aem_stringslice_match_alnum(struct aem_stringslice *slice);
struct aem_stringslice aem_stringslice_match_word(struct aem_stringslice *slice);
// Match a line, even if it's missing its line terminator.
struct aem_stringslice aem_stringslice_match_line(struct aem_stringslice *slice);
// Match a line, but fail if it has no line terminator unless finish is non-zero.
//
// This function is intended to be used when the input stringslice might not
// contain all data yet - it can be called again whenever more data becomes
// available, and it will only return a line and modify the input stringslice
// when a complete line is available (unless finish is set).
//
// The `state` input should be initialized to zero and preserved across calls.
// It is currently only used to ensure CRLF line terminators are treated as
// only a single newline even when they're split across multiple partial
// buffers fed to consecutive calls to this function.
struct aem_stringslice aem_stringslice_match_line_multi(struct aem_stringslice *slice, int *state, int finish);
int aem_stringslice_match_prefix(struct aem_stringslice *slice, struct aem_stringslice s);
int aem_stringslice_match_suffix(struct aem_stringslice *slice, struct aem_stringslice s);
static inline int aem_stringslice_match(struct aem_stringslice *slice, const char *s)
{
return aem_stringslice_match_prefix(slice, aem_stringslice_new_cstr(s));
}
static inline int aem_stringslice_match_end(struct aem_stringslice *slice, const char *s)
{
return aem_stringslice_match_suffix(slice, aem_stringslice_new_cstr(s));
}
static inline int aem_stringslice_match_bom(struct aem_stringslice *slice)
{
return aem_stringslice_match(slice, "\xEF\xBB\xBF");
}
// Test whether a stringslice exactly matches the given C-string
int aem_stringslice_eq(struct aem_stringslice slice, const char *s);
// Case-insensitive version of the above
int aem_stringslice_eq_case(struct aem_stringslice slice, const char *s);
int aem_stringslice_cmp(struct aem_stringslice s0, struct aem_stringslice s1);
int aem_stringslice_match_hexbyte(struct aem_stringslice *slice);
int aem_stringslice_match_ulong_base(struct aem_stringslice *slice, int base, unsigned long int *out);
int aem_stringslice_match_long_base(struct aem_stringslice *slice, int base, long int *out);
int aem_stringslice_match_uint_base(struct aem_stringslice *slice, int base, unsigned int *out);
int aem_stringslice_match_int_base(struct aem_stringslice *slice, int base, int *out);
int aem_stringslice_match_long_auto(struct aem_stringslice *slice, long int *out);
// TODO: inconsistency: this function returns -1 on failure and 0 on success,
// while most other functions in this file that only use their return value to
// indicate status return 0 on failure and 1 on success.
aem_deprecated_msg("use aem_stringslice_match_int_base instead") static inline int aem_stringslice_match_int(struct aem_stringslice *slice, int base, int *out)
{
return !aem_stringslice_match_int_base(slice, base, out);
}
#endif /* AEM_STRINGSLICE_H */