Skip to content

Commit

Permalink
Fix gnutar creation with unicode hardlink names on Windows (libarchiv…
Browse files Browse the repository at this point in the history
…e#2227)

The code currently uses `archive_entry_hardlink` to determine if an
entry is a hardlink, however on Windows, this call will fail if the path
cannot be represented in the current locale. This instead checks to see
if any entry in the `archive_mstring` is set.
  • Loading branch information
dunhor authored Jun 20, 2024
1 parent 56e0236 commit 07206cd
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 1 deletion.
6 changes: 6 additions & 0 deletions libarchive/archive_entry.c
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,12 @@ archive_entry_hardlink_w(struct archive_entry *entry)
return (NULL);
}

int
archive_entry_hardlink_is_set(struct archive_entry *entry)
{
return (entry->ae_set & AE_SET_HARDLINK) != 0;
}

int
_archive_entry_hardlink_l(struct archive_entry *entry,
const char **p, size_t *len, struct archive_string_conv *sc)
Expand Down
1 change: 1 addition & 0 deletions libarchive/archive_entry.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ __LA_DECL void archive_entry_set_link_to_hardlink(struct archive_entry *);
__LA_DECL const char *archive_entry_hardlink(struct archive_entry *);
__LA_DECL const char *archive_entry_hardlink_utf8(struct archive_entry *);
__LA_DECL const wchar_t *archive_entry_hardlink_w(struct archive_entry *);
__LA_DECL int archive_entry_hardlink_is_set(struct archive_entry *);
__LA_DECL la_int64_t archive_entry_ino(struct archive_entry *);
__LA_DECL la_int64_t archive_entry_ino64(struct archive_entry *);
__LA_DECL int archive_entry_ino_is_set(struct archive_entry *);
Expand Down
2 changes: 1 addition & 1 deletion libarchive/archive_write_set_format_gnutar.c
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ archive_write_gnutar_header(struct archive_write *a,
goto exit_write_header;
}

if (archive_entry_hardlink(entry) != NULL) {
if (archive_entry_hardlink_is_set(entry)) {
tartype = '1';
} else
switch (archive_entry_filetype(entry)) {
Expand Down
102 changes: 102 additions & 0 deletions libarchive/test/test_gnutar_filename_encoding.c
Original file line number Diff line number Diff line change
Expand Up @@ -389,3 +389,105 @@ DEFINE_TEST(test_gnutar_filename_encoding_CP932_UTF8)
assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
}

DEFINE_TEST(test_gnutar_filename_encoding_UTF16_win)
{
#if !defined(_WIN32) || defined(__CYGWIN__)
skipping("This test is meant to verify unicode string handling"
" on Windows with UTF-16 names");
return;
#else
struct archive *a;
struct archive_entry *entry;
char buff[4096];
size_t used;

/*
* Don't call setlocale because we're verifying that the '_w' functions
* work as expected when 'hdrcharset' is UTF-8
*/

/* Part 1: file */
a = archive_write_new();
assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
skipping("This system cannot convert character-set"
" from UTF-16 to UTF-8.");
archive_write_free(a);
return;
}
assertEqualInt(ARCHIVE_OK,
archive_write_open_memory(a, buff, sizeof(buff), &used));

entry = archive_entry_new2(a);
/* Set the filename using a UTF-16 string */
archive_entry_copy_pathname_w(entry, L"\u8868.txt");
archive_entry_set_filetype(entry, AE_IFREG);
archive_entry_set_size(entry, 0);
assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
archive_entry_free(entry);
assertEqualInt(ARCHIVE_OK, archive_write_free(a));

/* Check UTF-8 version. */
assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);

/* Part 2: directory */
a = archive_write_new();
assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
assertEqualInt(ARCHIVE_OK,
archive_write_open_memory(a, buff, sizeof(buff), &used));

entry = archive_entry_new2(a);
/* Set the directory name using a UTF-16 string */
/* NOTE: Explicitly not adding trailing slash to test that code path */
archive_entry_copy_pathname_w(entry, L"\u8868");
archive_entry_set_filetype(entry, AE_IFDIR);
archive_entry_set_size(entry, 0);
assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
archive_entry_free(entry);
assertEqualInt(ARCHIVE_OK, archive_write_free(a));

/* Check UTF-8 version. */
assertEqualMem(buff, "\xE8\xA1\xA8/", 4);

/* Part 3: symlink */
a = archive_write_new();
assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
assertEqualInt(ARCHIVE_OK,
archive_write_open_memory(a, buff, sizeof(buff), &used));

entry = archive_entry_new2(a);
/* Set the symlink target using a UTF-16 string */
archive_entry_set_pathname(entry, "link.txt");
archive_entry_copy_symlink_w(entry, L"\u8868.txt");
archive_entry_set_filetype(entry, AE_IFLNK);
archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE);
archive_entry_set_size(entry, 0);
assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
archive_entry_free(entry);
assertEqualInt(ARCHIVE_OK, archive_write_free(a));

/* Check UTF-8 version. */
assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7);

/* Part 4: hardlink */
a = archive_write_new();
assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
assertEqualInt(ARCHIVE_OK,
archive_write_open_memory(a, buff, sizeof(buff), &used));

entry = archive_entry_new2(a);
/* Set the symlink target using a UTF-16 string */
archive_entry_set_pathname(entry, "link.txt");
archive_entry_copy_hardlink_w(entry, L"\u8868.txt");
archive_entry_set_size(entry, 0);
assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
archive_entry_free(entry);
assertEqualInt(ARCHIVE_OK, archive_write_free(a));

/* Check UTF-8 version. */
assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7);
#endif
}

0 comments on commit 07206cd

Please sign in to comment.