Skip to content

Commit

Permalink
replaced hsearch which was causing problems on APPLE with custom hash…
Browse files Browse the repository at this point in the history
…table - issue #76
  • Loading branch information
xflouris committed Jul 18, 2017
1 parent 21205f1 commit 724ef82
Show file tree
Hide file tree
Showing 7 changed files with 423 additions and 75 deletions.
4 changes: 3 additions & 1 deletion src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,6 @@ rtree.c \
svg.c \
svg_landscape.c \
util.c \
utree.c
utree.c \
hash.c \
list.c
72 changes: 34 additions & 38 deletions src/auto.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright (C) 2015 Tomas Flouri
Copyright (C) 2015-2017 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
Expand Down Expand Up @@ -159,30 +159,6 @@ static int cb_short_trees(rtree_t * node)

}

static void hash_tips(rtree_t * root)
{
int i;

/* obtain an array of pointers to tip names */
rtree_t ** tipnodes = (rtree_t **)xmalloc((size_t)(root->leaves) *
sizeof(rtree_t *));
rtree_query_tipnodes(root, tipnodes);

/* create a libc hash table of size tip_count */
hcreate(2*(size_t)(root->leaves));

/* populate a libc hash table with tree tip labels */
for (i = 0; i < root->leaves; ++i)
{
ENTRY entry;
entry.key = tipnodes[i]->label;
entry.data = (void *)(tipnodes[i]);
hsearch(entry, ENTER);
}
free(tipnodes);
}


static void set_encode_sequence(rtree_t * node,
char * sequence,
long seqlen,
Expand Down Expand Up @@ -211,22 +187,47 @@ static void link_sequences(rtree_t * root, char ** headers, char ** sequence, lo
{
int i;

/* obtain an array of pointers to tip names */
rtree_t ** tipnodes = (rtree_t **)xmalloc((size_t)(root->leaves) *
sizeof(rtree_t *));
rtree_query_tipnodes(root, tipnodes);

/* create a libc hash table of size tip_count */
hashtable_t * ht = hashtable_create(root->leaves);

/* populate a libc hash table with tree tip labels */
for (i = 0; i < root->leaves; ++i)
{
ENTRY query;
// printf("Linking %s\n", headers[i]);
query.key = headers[i];
ENTRY * found = NULL;
pair_t * pair = (pair_t *)xmalloc(sizeof(pair_t));
pair->label = tipnodes[i]->label;
pair->index = i;

found = hsearch(query,FIND);
if (!hashtable_insert(ht,
(void *)pair,
hash_fnv(tipnodes[i]->label),
hashtable_paircmp))
fatal("Duplicate taxon (%s)\n", tipnodes[i]->label);

if (!found)
}

for (i = 0; i < root->leaves; ++i)
{
pair_t * query = hashtable_find(ht,
headers[i],
hash_fnv(headers[i]),
hashtable_paircmp);


if (!query)
fatal("Sequence with header %s does not appear in the tree", headers[i]);

set_encode_sequence((rtree_t *)(found->data), sequence[i], seqlen, pll_map_nt);
set_encode_sequence(tipnodes[query->index], sequence[i], seqlen, pll_map_nt);
}
}

free(tipnodes);

hashtable_destroy(ht,free);
}

static int all_pairwise_dist(rtree_t ** tip_node_list, int tip_list_count, long seqlen)
{
Expand Down Expand Up @@ -263,14 +264,9 @@ void detect_min_bl(rtree_t * rtree)

seqlen = load_fasta(rtree->leaves, headers, seqdata);

hash_tips(rtree);

/* find sequences in hash table and link them with the corresponding taxa */
link_sequences(rtree, headers, seqdata, seqlen);

/* destroy hash table */
hdestroy();

/* get inner nodes that are roots of of the largest short subtrees. Short are
such subtrees where all branch lengths within them are less or equal to
opt_subtree_short. The largest such subtrees are those that are not
Expand Down
180 changes: 180 additions & 0 deletions src/hash.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
/*
Copyright (C) 2015-2017 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Contact: Tomas Flouri <[email protected]>,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/

#include "mptp.h"


/* Daniel J. Bernstein 2a hash function */
unsigned long hash_djb2a(char * s)
{
unsigned long hash = 5381;
unsigned long c;

while ((c = (unsigned long)*s++))
hash = ((hash << 5) + hash) ^ c; /* hash*33 ^ c */

return hash;
}

/* Fowler–Noll–Vo 1a hash function */
unsigned long hash_fnv(char * s)
{
unsigned long hash = 14695981039346656037UL;
unsigned long c;

while ((c = (unsigned long)*s++))
{
hash ^= c;
hash *= 1099511628211UL;
}

return hash;
}

static ht_item_t * hashitem_create(unsigned long key, void * value)
{
ht_item_t * hi = (ht_item_t *)xmalloc(sizeof(ht_item_t));

hi->key = key;
hi->value = value;

return hi;
}

int hashtable_strcmp(void * x, void * y)
{
return !strcmp((char *)x, (char *)y);
}

int hashtable_ptrcmp(void * x, void * y)
{
return (x == y);
}

int hashtable_paircmp(void * stored, void * query)
{
pair_t * stored_pair = (pair_t *)stored;
char * query_label = (char *)query;

return !strcmp(stored_pair->label, query_label);
}

void * hashtable_find(hashtable_t * ht,
void * x,
unsigned long hash,
int (*cb_cmp)(void *, void *))
{
unsigned long index = hash & (ht->table_size-1);
list_item_t * li = (list_item_t *)(ht->entries[index]->head);

while (li)
{
ht_item_t * hi = (ht_item_t *)(li->data);

if ((hash == hi->key) && cb_cmp(hi->value, x))
return hi->value;

li = li->next;
}

return NULL;
}


hashtable_t * hashtable_create(unsigned long items_count)
{
unsigned long i;
unsigned long size = 1;

if (!items_count) return NULL;

/* compute a size of at least double the items count that is a
multiple of 2 */
items_count <<= 1;
while (size < items_count)
size <<= 1;

/* allocate and init hash table */
hashtable_t * ht = (hashtable_t *)xmalloc(sizeof(hashtable_t));
ht->table_size = size;
ht->entries_count = 0;

/* allocate and init entries array */
ht->entries = (list_t **)xmalloc(size*sizeof(list_t *));
for (i = 0; i < size; ++i)
{
ht->entries[i] = (list_t *)xmalloc(sizeof(list_t));
memset(ht->entries[i], 0, sizeof(list_t));
}

return ht;
}

int hashtable_insert(hashtable_t * ht,
void * x,
unsigned long hash,
int (*cb_cmp)(void *, void *))
{
/* size is always a multiple of 2 and greater than 2 */
unsigned long index = hash & (ht->table_size-1);

list_t * list = ht->entries[index];


if (hashtable_find(ht, x, hash, cb_cmp))
return 0;

ht_item_t * item = hashitem_create(hash,x);
list_append(list, item);

ht->entries_count++;

return 1;
}

void hashtable_destroy(hashtable_t * ht, void (*cb_dealloc)(void *))
{
unsigned long i;

if (cb_dealloc)
{
for (i = 0; i < ht->table_size; ++i)
{
list_t * list = ht->entries[i];

list_item_t * head = list->head;
while (head)
{
ht_item_t * hi = (ht_item_t *)(head->data);
cb_dealloc(hi->value);
head = head->next;
}
}
}

for (i = 0; i < ht->table_size; ++i)
{
list_clear(ht->entries[i], free);
free(ht->entries[i]);
}
free(ht->entries);
free(ht);
}
98 changes: 98 additions & 0 deletions src/list.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
Copyright (C) 2015 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Contact: Tomas Flouri <[email protected]>,
Heidelberg Institute for Theoretical Studies,
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/

#include "mptp.h"

#define DEF_LIST_APPEND 0
#define DEF_LIST_PREPEND 1

static int list_insert(list_t * list, void * data, int where)
{
if (!list) return 0;

/* create list item */
list_item_t * item = (list_item_t *)xmalloc(sizeof(list_item_t));
item->data = data;

/* if list is empty */
if (!(list->count))
{
list->head = list->tail = item;
list->count = 1;
item->next = NULL;
return 1;
}

/* append */
if (where == DEF_LIST_APPEND)
{
list->tail->next = item;
list->tail = item;
item->next = NULL;
list->count++;
return 1;
}

/* prepend */
item->next = list->head;
list->head = item;
list->count++;

return 1;
}

void list_append(list_t * list, void * data)
{
list_insert(list, data, DEF_LIST_APPEND);
}

void list_prepend(list_t * list, void * data)
{
list_insert(list, data, DEF_LIST_PREPEND);
}

void list_clear(list_t * list, void (*cb_dealloc)(void *))
{
if (!list) return;

list_item_t * head = list->head;

while (head)
{
list_item_t * temp = head;
head = head->next;
if (cb_dealloc)
cb_dealloc(temp->data);
free(temp);
}

list->head = list->tail = NULL;
list->count = 0;
}

list_t * list_create(void * data)
{
list_t * list = (list_t *)xmalloc(sizeof(list_t));
list->count = 0;
list_append(list, data);

return list;
}
Loading

0 comments on commit 724ef82

Please sign in to comment.