Skip to content

Commit

Permalink
Fix error handling with iconv (fix #38)
Browse files Browse the repository at this point in the history
When --strict is given, add //IGNORE to the encoding (remove manually set
“-ignore” encodings). Return an error when the input is untranslatable,
unless --force is also given.

Only modify the to-encoding in this way, not the from-encoding, as that
doesn’t do anything.
  • Loading branch information
rrthomas committed Feb 15, 2022
1 parent b9b686e commit 62b996d
Show file tree
Hide file tree
Showing 8 changed files with 76 additions and 45 deletions.
19 changes: 14 additions & 5 deletions doc/recode.texi
Original file line number Diff line number Diff line change
Expand Up @@ -1803,9 +1803,20 @@ external @code{iconv} library. This means that the charsets and aliases
provided by the @code{iconv} external library and not by Recode
itself are not available.

@item RECODE_STRICT_MAPPING_FLAG

When this flag is set (corresponding to the @samp{--strict} command-line
option), untranslatable characters are discarded, but an error is
returned on completion unless @samp{RECODE_FORCE_FLAG} is also set.

@item RECODE_FORCE_FLAG

When this flag is set (corresponding to the @samp{--force} command-line
option), errors caused by untranslatable characters are ignored.

@end table

In previous incatations of the Recode library, @var{flags}
In previous incarnations of the Recode library, @var{flags}
was a Boolean instead of a collection of flags, meant to set
@code{RECODE_AUTO_ABORT_FLAG}. This still works, but is deprecated.

Expand Down Expand Up @@ -2945,10 +2956,8 @@ an external @code{iconv} library, as they likely share many charsets.
We discuss, here, the issues related to this duplication, and other
peculiarities specific to the @code{iconv} library.

If the string @code{-ignore} is appended to the @var{after} encoding,
characters that cannot be converted are discarded and an error is
printed after conversion. This corresponds to the @code{iconv} option
@code{//IGNORE}.
The @code{strict_mapping} request option is implemented by adding
@code{iconv} option @code{//IGNORE} to the @samp{after} encoding.

If the string @code{-translit} is appended to the @var{after} encoding,
characters being converted are transliterated when needed and possible.
Expand Down
69 changes: 36 additions & 33 deletions src/iconv.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Conversion of files between different charsets and surfaces.
Copyright © 1999, 2000, 2001, 2008 Free Software Foundation, Inc.
Copyright © 1999-2022 Free Software Foundation, Inc.
Contributed by François Pinard <[email protected]>, 1999,
and Bruno Haible <[email protected]>, 2000.
Expand Down Expand Up @@ -28,6 +28,18 @@
| Use `iconv' to handle a double step. |
`--------------------------------------*/

static void
do_iconv (RECODE_OUTER outer,
iconv_t conversion,
char **input, size_t *input_left,
char **output, size_t *output_left,
int *saved_errno)
{
size_t converted = iconv (conversion, input, input_left, output, output_left);
if (converted == (size_t) -1 && !(errno == EILSEQ && outer->force))
*saved_errno = errno;
}

#define BUFFER_SIZE 2048

static bool
Expand All @@ -48,15 +60,16 @@ wrapped_transform (iconv_t conversion, RECODE_SUBTASK subtask)
size_t input_left = 0;
size_t output_left = BUFFER_SIZE;
int saved_errno = 0;
size_t converted;

if (drain_first)
{
/* Drain all accumulated partial state and emit output
to return to the initial shift state. */
converted = iconv (conversion, NULL, NULL, &output, &output_left);
if (converted == (size_t) -1)
saved_errno = errno;
do_iconv (subtask->task->request->outer,
conversion,
NULL, NULL,
&output, &output_left,
&saved_errno);
}

if (saved_errno == 0)
Expand Down Expand Up @@ -84,11 +97,11 @@ wrapped_transform (iconv_t conversion, RECODE_SUBTASK subtask)
/* Convert accumulated input and add it to the output buffer. */
input = input_buffer;
input_left = cursor - input_buffer;
converted = iconv (conversion,
&input, &input_left,
&output, &output_left);
if (converted == (size_t) -1)
saved_errno = errno;
do_iconv (subtask->task->request->outer,
conversion,
&input, &input_left,
&output, &output_left,
&saved_errno);
}
}

Expand Down Expand Up @@ -166,30 +179,21 @@ ends_with (const char *s, size_t s_len, const char *suff, size_t suff_len)
}

static char *
iconv_fix_options (const char *charset)
iconv_fix_options (RECODE_OUTER outer, const char *charset)
{
size_t charset_len = strlen (charset);
bool ignore = false, translit = false;

do {
if (ends_with (charset, charset_len, "-translit", strlen ("-translit")))
{
translit = true;
charset_len -= strlen ("-translit");
}
else if (ends_with (charset, charset_len, "-ignore", strlen ("-ignore")))
{
ignore = true;
charset_len -= strlen ("-ignore");
}
else
break;
} while (true);
bool translit = false;

if (ends_with (charset, charset_len, "-translit", strlen ("-translit")))
{
translit = true;
charset_len -= strlen ("-translit");
}

char *result;
if (asprintf (&result, "%.*s%s%s", (int) charset_len, charset,
translit ? "//TRANSLIT" : "",
ignore ? "//IGNORE": "")
outer->strict_mapping ? "//IGNORE": "")
== -1)
return NULL;
return result;
Expand All @@ -198,24 +202,23 @@ iconv_fix_options (const char *charset)
bool
transform_with_iconv (RECODE_SUBTASK subtask)
{
RECODE_OUTER outer = subtask->task->request->outer;
RECODE_CONST_STEP step = subtask->step;
char *tocode = iconv_fix_options (step->after->iconv_name);
char *fromcode = iconv_fix_options (step->before->iconv_name);
char *tocode = iconv_fix_options (outer, step->after->iconv_name);
const char *fromcode = step->before->iconv_name;
iconv_t conversion = (iconv_t) -1;

if (tocode && fromcode)
if (tocode)
conversion = iconv_open (tocode, fromcode);
if (conversion == (iconv_t) -1)
{
recode_if_nogo (RECODE_SYSTEM_ERROR, subtask);
free (fromcode);
free (tocode);
SUBTASK_RETURN (subtask);
}

bool status = wrapped_transform (conversion, subtask);
iconv_close (conversion);
free (fromcode);
free (tocode);
return status;
}
Expand Down
12 changes: 9 additions & 3 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -333,13 +333,14 @@ new_outer(unsigned flags)
{
/* Register all modules and build internal tables. */

RECODE_OUTER outer = recode_new_outer (flags | RECODE_AUTO_ABORT_FLAG);
RECODE_OUTER outer =
recode_new_outer (flags | RECODE_AUTO_ABORT_FLAG);
if (!outer)
abort ();

/* Set strict mapping. */
/* If using strict mapping, remove fallbacks. */

if (strict_mapping)
if (outer->strict_mapping)
for (RECODE_SINGLE single = outer->single_list;
single;
single = single->next)
Expand Down Expand Up @@ -494,6 +495,7 @@ main (int argc, char *const *argv)
case 'f':
task_option.fail_level = RECODE_SYSTEM_ERROR;
task_option.abort_level = RECODE_USER_ERROR;
force_flag = true;
break;

case 'g':
Expand Down Expand Up @@ -647,6 +649,10 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"),
if ((ignored_name && *ignored_name == ':')
|| request_option.make_header_flag)
flags |= RECODE_NO_ICONV_FLAG;
if (strict_mapping)
flags |= RECODE_STRICT_MAPPING_FLAG;
if (force_flag)
flags |= RECODE_FORCE_FLAG;
RECODE_OUTER outer = new_outer (flags);

/* Process charset listing options. */
Expand Down
2 changes: 2 additions & 0 deletions src/outer.c
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,8 @@ recode_new_outer (unsigned flags)

outer->auto_abort = (flags & RECODE_AUTO_ABORT_FLAG) != 0;
outer->use_iconv = (flags & RECODE_NO_ICONV_FLAG) == 0;
outer->strict_mapping = (flags & RECODE_STRICT_MAPPING_FLAG) != 0;
outer->force = (flags & RECODE_FORCE_FLAG) != 0;

if (!register_all_modules (outer) || !make_argmatch_arrays (outer))
{
Expand Down
4 changes: 3 additions & 1 deletion src/recode.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Conversion of files between different charsets and surfaces.
Copyright © 1990, 93, 94, 96, 97, 98, 99, 00 Free Software Foundation, Inc.
Copyright © 1990-2022 Free Software Foundation, Inc.
Contributed by François Pinard <[email protected]>, 1988.
This library is free software; you can redistribute it and/or
Expand Down Expand Up @@ -56,6 +56,8 @@ extern "C" {

#define RECODE_AUTO_ABORT_FLAG 1
#define RECODE_NO_ICONV_FLAG 2
#define RECODE_STRICT_MAPPING_FLAG 4
#define RECODE_FORCE_FLAG 8

RECODE_OUTER recode_new_outer (unsigned);
bool recode_delete_outer (RECODE_OUTER);
Expand Down
7 changes: 7 additions & 0 deletions src/recodext.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ struct recode_outer
/* If the external `iconv' library should be initialized and used. */
bool use_iconv;

/* If we should discard untranslatable input and return an error,
unless 'force' is set (see below). */
bool strict_mapping;

/* If we should ignore untranslatable input altogether. */
bool force;

/* charset.c */
/* --------- */

Expand Down
4 changes: 1 addition & 3 deletions tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,15 +489,13 @@ def digest(self):
def complete(self, french):
def write_charset(format, charset):
write(format % charset)
write(format % (charset + "-ignore"))
write(format % (charset + "-translit"))
write(format % (charset + "-translit-ignore"))
if not self.do_sources:
return
write = Output(self.SOURCES).write
count = 1
for charset, aliases in self.data:
versions = 4 # Normal, //IGNORE, //TRANSLIT, //TRANSLIT//IGNORE
versions = 2 # Normal, //TRANSLIT
count = count + (versions + len(aliases)) * versions
write('\n'
"/* This is derived from Bruno Haible's `libiconv' package. */"
Expand Down
4 changes: 4 additions & 0 deletions tests/Recode.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,7 @@ cdef extern from "common.h":
enum:
RECODE_AUTO_ABORT_FLAG
RECODE_NO_ICONV_FLAG
RECODE_STRICT_MAPPING_FLAG

RECODE_OUTER recode_new_outer(unsigned)
bool recode_delete_outer(RECODE_OUTER)
Expand Down Expand Up @@ -514,6 +515,7 @@ BYTE_ORDER_MARK_SWAPPED = BYTE_ORDER_MARK_SWAPPED_

AUTO_ABORT_FLAG = RECODE_AUTO_ABORT_FLAG
NO_ICONV_FLAG = RECODE_NO_ICONV_FLAG
STRICT_MAPPING_FLAG = RECODE_STRICT_MAPPING_FLAG

## Recode library at OUTER level.

Expand All @@ -528,6 +530,8 @@ cdef class Outer:
flags = flags | RECODE_AUTO_ABORT_FLAG
if not iconv:
flags = flags | RECODE_NO_ICONV_FLAG
if strict:
flags = flags | RECODE_STRICT_MAPPING_FLAG
self.outer = recode_new_outer(flags)
if strict:
single = self.outer.single_list
Expand Down

0 comments on commit 62b996d

Please sign in to comment.