path: root/src/hash.c



/*
 * =====================================================================================
 *
 *       Filename:  hash.c
 *
 *    Description:  Implementation of a basic hashmap 
 *
 *        Version:  1.0
 *        Created:  12/09/2022 07:24:38 PM
 *       Revision:  none
 *       Compiler:  gcc
 *
 *         Author:  YOUR NAME (), *   Organization:  
 *
 * =====================================================================================
 */
#include <stdlib.h>
#include <math.h>
#include "hash.h"
#include "clog.h"


/*-----------------------------------------------------------------------------
 * We're defining the structs inside the .c file because really no one should be
 * messing around with the internals except through the exported functions
 *-----------------------------------------------------------------------------*/

typedef struct _map_element {
    int key;
    int is_used;
    any_type value;
} map_element;

typedef struct _map {
    int max_size;
    int cur_size;
    map_element* data;
} map;


/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  map_init
 *  Description:  Initializes a new hashmap
 * =====================================================================================
 */
hash_map map_init () {
    map* m = (map*) malloc(sizeof(map));

    m->data = (map_element*) calloc(512, sizeof(map_element));

    m->max_size = 512;
    m->cur_size = 0;
    return m;
}		/* -----  end of function map_init  ----- */


/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  hash_int
 *  Description:  
 * =====================================================================================
 */
unsigned int hash_int (map* ma, unsigned int key) {

    /*-----------------------------------------------------------------------------
     * This is a pretty basic hashing function, basically we're still using
     * modulo but in such a way that collisions are less likely than if we just
     * used it raw.
     *
     * The advantage of this is that it doesn't matter the max size of the
     * hashmap, it doesn't have to be sufficiently removed from a power of 2
     *
     * See: Hashing by Multiplication here: https://en.wikipedia.org/wiki/Hash_table#Hash_function
     *-----------------------------------------------------------------------------*/
    return fmodf(floor(ma->max_size * (key * fmodf(0.3, 1.0))), ma->max_size); 
}		/* -----  end of function hash_int  ----- */


/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  hash_get_idx
 *  Description:  
 * =====================================================================================
 */
int hash_get_idx (hash_map* m, int key) {
    int c;
    map* ma = (map*) m;

    if (ma->cur_size == ma->max_size) {
        // Hash map is full, we need to exit now
        return -2;
    }

    c = hash_int(ma, key);                      /* This is the theoretically ideal hash index */

    /*-----------------------------------------------------------------------------
     * Pretty basic collision resolution. Needs to be able to resolve them
     *-----------------------------------------------------------------------------*/

    for(int i = 0; i < ma->max_size; i++) {
        if (ma->data[c].is_used == 0) {
            /*-----------------------------------------------------------------------------
             * This index isn't being used, we can return it 
             *-----------------------------------------------------------------------------*/
            return c;
        }

        if (ma->data[c].key == key && ma->data[c].is_used == 1) {

            /*-----------------------------------------------------------------------------
             * This index is being used, but by this key, so we can return it 
             *-----------------------------------------------------------------------------*/
            return c;
        }


        /*-----------------------------------------------------------------------------
         * This index is being used but not by this key. Increment the index
         * modulo the map size and try again
         *-----------------------------------------------------------------------------*/
        c = (c + 1) % ma->max_size;
    }
    return -2;
}		/* -----  end of function hash_get_idx  ----- */


/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  map_resize
 *  Description:  
 * =====================================================================================
 */
int map_resize(hash_map in) {
    int i;
    int old_size;
    map_element* cur;

    /*  New elements! */
    map *m = (map*) in;
    map_element* tmp = (map_element*) calloc(2 * m->max_size, sizeof(map_element));
    if (!tmp) return 4;                         /* We're out of memory! */

    cur = m->data;
    m->data = tmp;

    old_size = m->max_size;
    m->max_size = 2 * old_size;
    m->cur_size = 0;

    /*  Rehash */
    for (i = 0; i < old_size; i++) {
        int status = map_add(m, cur[i].key, cur[i].value);
        if (status != 0) {
            return status;
        }
    }

    free(cur);
    return 0;
}		/* -----  end of function map_resize  ----- */

/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  map_add
 *  Description:  
 * =====================================================================================
 */
int map_add(hash_map m, int key, any_type val) {
    map* ma = (map*) m; 

    int h = hash_get_idx(m, key);

    while (h == -2) {
        if (map_resize(m) == 4) {
            return 4;
        }
        h = hash_get_idx(m, key);
    }

    ma->data[h].value = val;
    ma->data[h].key = key;
    ma->data[h].is_used = 1;
    return 0;
}		/* -----  end of function map_add  ----- */


/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  map_get
 *  Description:  
 * =====================================================================================
 */
int map_get (hash_map m, int key, any_type* val) {
    map* ma = (map*) m;

    int h = hash_get_idx(m, key);

    if (ma->data[h].is_used == 1 && ma->data[h].key == key) {
        *val = ma->data[h].value;
        return 0;
    }
    return -3;
}		/* -----  end of function map_get  ----- */


/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  map_del
 *  Description:  
 * =====================================================================================
 */
int map_del (hash_map m, int key) {
  map* ma = (map*) m;

  int h = hash_get_idx(m, key);

  if (ma->data[h].is_used == 1 && ma->data[h].key == key) {
    ma->data[h].is_used = 0;
    ma->data[h].key = 0;
  }

  return 0;
}		/* -----  end of function map_del  ----- */

/*
 *map_free cleans up the memory used by a hashmap
 */
void map_free(hash_map m) {
  map* ma = (map*) m;

  free(ma->data);
  free(ma);
}

/*
 *map_len returns the current length of the hashmap
 */
int map_len(hash_map m) {
  map* ma = (map*) m;

  return ma->cur_size;
}
/*
 * =====================================================================================
 *
 *       Filename:  hash.c
 *
 *    Description:  Implementation of a basic hashmap 
 *
 *        Version:  1.0
 *        Created:  12/09/2022 07:24:38 PM
 *       Revision:  none
 *       Compiler:  gcc
 *
 *         Author:  YOUR NAME (), *   Organization:  
 *
 * =====================================================================================
 */
#include <stdlib.h>
#include <math.h>
#include "hash.h"
#include "clog.h"


/*-----------------------------------------------------------------------------
 * We're defining the structs inside the .c file because really no one should be
 * messing around with the internals except through the exported functions
 *-----------------------------------------------------------------------------*/

typedef struct _map_element {
    int key;
    int is_used;
    any_type value;
} map_element;

typedef struct _map {
    int max_size;
    int cur_size;
    map_element* data;
} map;


/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  map_init
 *  Description:  Initializes a new hashmap
 * =====================================================================================
 */
hash_map map_init () {
    map* m = (map*) malloc(sizeof(map));

    m->data = (map_element*) calloc(512, sizeof(map_element));

    m->max_size = 512;
    m->cur_size = 0;
    return m;
}		/* -----  end of function map_init  ----- */


/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  hash_int
 *  Description:  
 * =====================================================================================
 */
unsigned int hash_int (map* ma, unsigned int key) {

    /*-----------------------------------------------------------------------------
     * This is a pretty basic hashing function, basically we're still using
     * modulo but in such a way that collisions are less likely than if we just
     * used it raw.
     *
     * The advantage of this is that it doesn't matter the max size of the
     * hashmap, it doesn't have to be sufficiently removed from a power of 2
     *
     * See: Hashing by Multiplication here: https://en.wikipedia.org/wiki/Hash_table#Hash_function
     *-----------------------------------------------------------------------------*/
    return fmodf(floor(ma->max_size * (key * fmodf(0.3, 1.0))), ma->max_size); 
}		/* -----  end of function hash_int  ----- */


/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  hash_get_idx
 *  Description:  
 * =====================================================================================
 */
int hash_get_idx (hash_map* m, int key) {
    int c;
    map* ma = (map*) m;

    if (ma->cur_size == ma->max_size) {
        // Hash map is full, we need to exit now
        return -2;
    }

    c = hash_int(ma, key);                      /* This is the theoretically ideal hash index */

    /*-----------------------------------------------------------------------------
     * Pretty basic collision resolution. Needs to be able to resolve them
     *-----------------------------------------------------------------------------*/

    for(int i = 0; i < ma->max_size; i++) {
        if (ma->data[c].is_used == 0) {
            /*-----------------------------------------------------------------------------
             * This index isn't being used, we can return it 
             *-----------------------------------------------------------------------------*/
            return c;
        }

        if (ma->data[c].key == key && ma->data[c].is_used == 1) {

            /*-----------------------------------------------------------------------------
             * This index is being used, but by this key, so we can return it 
             *-----------------------------------------------------------------------------*/
            return c;
        }


        /*-----------------------------------------------------------------------------
         * This index is being used but not by this key. Increment the index
         * modulo the map size and try again
         *-----------------------------------------------------------------------------*/
        c = (c + 1) % ma->max_size;
    }
    return -2;
}		/* -----  end of function hash_get_idx  ----- */


/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  map_resize
 *  Description:  
 * =====================================================================================
 */
int map_resize(hash_map in) {
    int i;
    int old_size;
    map_element* cur;

    /*  New elements! */
    map *m = (map*) in;
    map_element* tmp = (map_element*) calloc(2 * m->max_size, sizeof(map_element));
    if (!tmp) return 4;                         /* We're out of memory! */

    cur = m->data;
    m->data = tmp;

    old_size = m->max_size;
    m->max_size = 2 * old_size;
    m->cur_size = 0;

    /*  Rehash */
    for (i = 0; i < old_size; i++) {
        int status = map_add(m, cur[i].key, cur[i].value);
        if (status != 0) {
            return status;
        }
    }

    free(cur);
    return 0;
}		/* -----  end of function map_resize  ----- */

/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  map_add
 *  Description:  
 * =====================================================================================
 */
int map_add(hash_map m, int key, any_type val) {
    map* ma = (map*) m; 

    int h = hash_get_idx(m, key);

    while (h == -2) {
        if (map_resize(m) == 4) {
            return 4;
        }
        h = hash_get_idx(m, key);
    }

    ma->data[h].value = val;
    ma->data[h].key = key;
    ma->data[h].is_used = 1;
    return 0;
}		/* -----  end of function map_add  ----- */


/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  map_get
 *  Description:  
 * =====================================================================================
 */
int map_get (hash_map m, int key, any_type* val) {
    map* ma = (map*) m;

    int h = hash_get_idx(m, key);

    if (ma->data[h].is_used == 1 && ma->data[h].key == key) {
        *val = ma->data[h].value;
        return 0;
    }
    return -3;
}		/* -----  end of function map_get  ----- */


/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  map_del
 *  Description:  
 * =====================================================================================
 */
int map_del (hash_map m, int key) {
  map* ma = (map*) m;

  int h = hash_get_idx(m, key);

  if (ma->data[h].is_used == 1 && ma->data[h].key == key) {
    ma->data[h].is_used = 0;
    ma->data[h].key = 0;
  }

  return 0;
}		/* -----  end of function map_del  ----- */

/*
 *map_free cleans up the memory used by a hashmap
 */
void map_free(hash_map m) {
  map* ma = (map*) m;

  free(ma->data);
  free(ma);
}

/*
 *map_len returns the current length of the hashmap
 */
int map_len(hash_map m) {
  map* ma = (map*) m;

  return ma->cur_size;
}