Skip to content

Commit

Permalink
Implemented Feature jessek#225 - Don't follow symlinks.
Browse files Browse the repository at this point in the history
The flag '-R' causes hashdeep to hash the output of readlink instead of
following the file.

Tested on Ubuntu 16.04 and Mac OS X High Sierra.
Not supported on Windows.
  • Loading branch information
s-d-adams committed Feb 20, 2018
1 parent 8776134 commit d994b4f
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 17 deletions.
5 changes: 5 additions & 0 deletions man/hashdeep.1
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,11 @@ open(). Specifying \fB-Fm\fR will use memory-mapped I/O which will be
faster on some platforms, but which (currently) will not work with
files that produce I/O errors.

.TP
\fB-R\fR
Don't follow symlinks, instead hash the output of readlink. (Not available on
Windows)



.TP
Expand Down
29 changes: 24 additions & 5 deletions src/dig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,19 @@ file_types file_metadata_t::decode_file_type(const struct __stat64 &sb)
*/
int file_metadata_t::stat(const tstring &fn,
file_metadata_t *m,
class display &ocb)
class display &ocb,
bool const is_symlink)
{
struct __stat64 sb;
if (::TSTAT(fn.c_str(),&sb))
if (ocb.opt_readlink && is_symlink)
{
if (::TLSTAT(fn.c_str(),&sb))
{
ocb.error_filename(fn,"%s",strerror(errno));
return -1;
}
}
else if (::TSTAT(fn.c_str(),&sb))
{
ocb.error_filename(fn,"%s",strerror(errno));
return -1;
Expand Down Expand Up @@ -564,6 +573,13 @@ void state::process_dir(const tstring &fn)
*/
bool state::should_hash_symlink(const tstring &fn, file_types *link_type)
{
/**
* When readlink option is set, all symlinks are to be hashed.
*/
if (ocb.opt_readlink) {
return true;
}

/**
* We must look at what this symlink points to before we process it.
* The file_type() function uses lstat to examine the file.
Expand Down Expand Up @@ -694,9 +710,11 @@ bool state::should_hash_expert(const tstring &fn, file_types type)
* but if it is called with a directory it recursively hashes it.
*/

bool state::should_hash(const tstring &fn)
bool state::should_hash(const tstring &fn, file_types &_type)
{
file_types type = state::file_type(fn,&ocb,0,0,0,0);

_type = type;

if (mode_expert)
return should_hash_expert(fn,type);
Expand Down Expand Up @@ -735,8 +753,9 @@ void state::dig_normal(const tstring &fn_) {
#endif
if (opt_debug)
ocb.status("*** cleaned:%s",global::make_utf8(fn).c_str());
if (should_hash(fn))
ocb.hash_file(fn);
file_types type;
if (should_hash(fn, type))
ocb.hash_file(fn, type);
}


Expand Down
50 changes: 43 additions & 7 deletions src/hash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,16 @@ bool file_data_hasher_t::compute_hash(uint64_t request_start,uint64_t request_le
hc1->read_offset = request_start;
hc1->read_len = 0; // so far

unsigned char *readlink_buffer = 0;
if (ocb->opt_readlink && file_is_symlink) {
#ifndef _WIN32
if (request_len > file_data_hasher_t::MD5DEEP_IDEAL_BLOCK_SIZE) {
readlink_buffer = (unsigned char*)malloc(request_len);
readlink(file_name_to_hash.c_str(), (char*)readlink_buffer, request_len);
}
#endif
}

while (request_len>0){
// Clear the buffer in case we hit an error and need to pad the hash
// The use of MD5DEEP_IDEAL_BLOCK_SIZE means that we loop even for memory-mapped
Expand All @@ -107,10 +117,21 @@ bool file_data_hasher_t::compute_hash(uint64_t request_start,uint64_t request_le

ssize_t current_read_bytes = 0; // read the data into buffer

if(this->handle){
if (ocb->opt_readlink && file_is_symlink) {
#ifndef _WIN32
if (request_len > file_data_hasher_t::MD5DEEP_IDEAL_BLOCK_SIZE) {
memcpy(buffer_, readlink_buffer + hc1->read_len, toread);
}
else {
readlink(file_name_to_hash.c_str(), (char*)buffer_, toread);
}
current_read_bytes = toread;
#endif
}
else if(this->handle){
current_read_bytes = fread(buffer_, 1, toread, this->handle);
} else {
assert(this->fd!=0);
assert(this->fd!=-1);
if(this->base){
buffer = this->base + request_start;
current_read_bytes = min(toread,this->bounds - request_start); // can't read more than this
Expand Down Expand Up @@ -169,6 +190,12 @@ bool file_data_hasher_t::compute_hash(uint64_t request_start,uint64_t request_le
request_start += toread;
request_len -= toread;
}

if (readlink_buffer) {
free(readlink_buffer);
readlink_buffer = 0;
}

if (ocb->opt_estimate) ocb->clear_realtime_stats();
if (this->file_bytes == this->stat_bytes) this->eof = true; // end of the file
return true; // done hashing!
Expand Down Expand Up @@ -201,6 +228,8 @@ void file_data_hasher_t::hash()
{
file_data_hasher_t *fdht = this;

bool const readlink_this_file = ocb->opt_readlink && fdht->file_is_symlink;

/*
* If the handle is set, we are probably hashing stdin.
* If not, figure out file size and full file name for the handle
Expand All @@ -211,7 +240,7 @@ void file_data_hasher_t::hash()
//state::file_type(fdht->file_name_to_hash,ocb,&fdht->stat_bytes,
//&fdht->ctime,&fdht->mtime,&fdht->atime);
file_metadata_t m;
file_metadata_t::stat(fdht->file_name_to_hash,&m,*ocb);
file_metadata_t::stat(fdht->file_name_to_hash,&m,*ocb,fdht->file_is_symlink);
fdht->stat_bytes = m.size;
fdht->ctime = m.ctime;
fdht->mtime = m.mtime;
Expand All @@ -238,7 +267,11 @@ void file_data_hasher_t::hash()
}
}

switch(ocb->opt_iomode){
if (readlink_this_file) {
assert(fdht->fd == -1);
assert(fdht->handle == 0);
}
else switch(ocb->opt_iomode){
case iomode::buffered:
assert(fdht->handle==0);

Expand Down Expand Up @@ -360,7 +393,7 @@ void file_data_hasher_t::hash()
*/
fdht->file_bytes = 0;
if(fdht->handle) fseeko(fdht->handle, 0, SEEK_SET);
if(fdht->fd){
if(fdht->fd != -1){
lseek(this->fd,0,SEEK_SET);
}
fdht->eof = false; //
Expand All @@ -381,7 +414,7 @@ void file_data_hasher_t::hash()
while (fdht->eof==false) {

uint64_t request_len = fdht->stat_bytes; // by default, hash the file
if ( fdht->ocb->piecewise_size>0 ) {
if ( !readlink_this_file && fdht->ocb->piecewise_size>0 ) {
request_len = fdht->ocb->piecewise_size;
}

Expand Down Expand Up @@ -467,10 +500,13 @@ void worker::do_work(file_data_hasher_t *fdht)
* 2 - hash the fdht
* 3 - record it in stdout using display.
*/
void display::hash_file(const tstring &fn)
void display::hash_file(const tstring &fn, file_types const type)
{
file_data_hasher_t *fdht = new file_data_hasher_t(this);
fdht->file_name_to_hash = fn;
if (type == stat_symlink) {
fdht->file_is_symlink = true;
}

/**
* If we are using a thread pool, hash in another thread
Expand Down
9 changes: 8 additions & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,11 @@ int state::hashdeep_process_command_line(int argc_, char **argv_)
bool did_usage = false;
int i;

while ((i=getopt(argc_,argv_,"abc:CdeEF:f:o:I:i:MmXxtlk:rsp:wvVhW:0D:uj:")) != -1) {
while ((i=getopt(argc_,argv_,"abc:CdeEF:f:o:I:i:MmXxtlk:rsp:wvVhW:0D:uj:"
#ifndef _WIN32
"R"
#endif
)) != -1) {
switch (i)
{
case 'a':
Expand Down Expand Up @@ -683,6 +687,9 @@ int state::hashdeep_process_command_line(int argc_, char **argv_)
case 'b': ocb.mode_barename=true; break;
case 'l': ocb.opt_relative=true; break;
case 'e': ocb.opt_estimate = true; break;
#ifndef _WIN32
case 'R': ocb.opt_readlink = true; break;
#endif
case 'r': mode_recursive=true; break;
case 's': ocb.opt_silent = true; break;

Expand Down
12 changes: 8 additions & 4 deletions src/main.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ class file_metadata_t {
static file_types decode_file_type(const struct __stat64 &sb);

// stat a file, print an error and return -1 if it fails, otherwise return 0
static int stat(const filename_t &path,file_metadata_t *m,class display &ocb);
static int stat(const filename_t &path,file_metadata_t *m,class display &ocb, bool is_symlink = false);
class fileid_t { // uniquely defines a file on this system
public:
fileid_t():dev(0),ino(0){};
Expand Down Expand Up @@ -296,6 +296,7 @@ class file_data_hasher_t : public file_data_t {
}
static const size_t MD5DEEP_IDEAL_BLOCK_SIZE = 8192;
file_data_hasher_t(class display *ocb_):
file_is_symlink(false),
ocb(ocb_), // where we put results
handle(0),
fd(-1),
Expand All @@ -322,6 +323,7 @@ class file_data_hasher_t : public file_data_t {

/* The actual file to hash */
filename_t file_name_to_hash;
bool file_is_symlink;

/* Where the results go */
class display *ocb;
Expand Down Expand Up @@ -585,6 +587,7 @@ class display {
opt_display_hash(false),
opt_show_matched(false),
opt_case_sensitive(true),
opt_readlink(false),
opt_iomode(iomode::buffered), // by default, use buffered
#ifdef HAVE_PTHREAD
opt_threadcount(threadpool::numCPU()),
Expand Down Expand Up @@ -620,6 +623,7 @@ class display {
bool opt_display_hash;
bool opt_show_matched;
bool opt_case_sensitive;
bool opt_readlink;
int opt_iomode;
int opt_threadcount;

Expand Down Expand Up @@ -755,7 +759,7 @@ class display {
void finalize_matching();

/* hash.cpp: Actually trigger the hashing. */
void hash_file(const tstring &file_name);
void hash_file(const tstring &file_name, file_types type);
void hash_stdin();
void dump_hashlist(){ lock(); known.dump_hashlist(); unlock(); }
};
Expand Down Expand Up @@ -794,7 +798,7 @@ public:;

state():mode_recursive(false), // do we recurse?
mode_warn_only(false), // for loading hash files

// these determine which files get hashed
mode_expert(false),
mode_regular(false),
Expand Down Expand Up @@ -904,7 +908,7 @@ public:;
bool should_hash_symlink(const tstring &fn,file_types *link_type);
bool should_hash_winpe(const tstring &fn);
bool should_hash_expert(const tstring &fn, file_types type);
bool should_hash(const tstring &fn);
bool should_hash(const tstring &fn, file_types &type);

/* file_type returns the file type of a string.
* If an error is found and ocb is provided, send the error to ocb.
Expand Down

0 comments on commit d994b4f

Please sign in to comment.