From 3368e92dbf4186951d806a61d7220ac23dd590bc Mon Sep 17 00:00:00 2001 From: JeffreyAsuncion Date: Thu, 17 Dec 2020 15:55:02 -0500 Subject: [PATCH 1/2] hash maps --- .../4_HashTable/Lab_2020_12_17.ipynb | 183 ++++++ .../4_HashTable/Lab_2020_12_17_a.ipynb | 561 ++++++++++++++++++ 2 files changed, 744 insertions(+) create mode 100644 data_structures/4_HashTable/Lab_2020_12_17.ipynb create mode 100644 data_structures/4_HashTable/Lab_2020_12_17_a.ipynb diff --git a/data_structures/4_HashTable/Lab_2020_12_17.ipynb b/data_structures/4_HashTable/Lab_2020_12_17.ipynb new file mode 100644 index 0000000..21c4706 --- /dev/null +++ b/data_structures/4_HashTable/Lab_2020_12_17.ipynb @@ -0,0 +1,183 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# List or Array data structure - O(n)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "stock_prices = []\n", + "with open(\"stock_prices.csv\", \"r\") as f:\n", + " for line in f:\n", + " tokens = line.split(',')\n", + " day = tokens[0]\n", + " price = float(tokens[1])\n", + " stock_prices.append([day, price])" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[['march 6', 310.0],\n", + " ['march 7', 340.0],\n", + " ['march 8', 380.0],\n", + " ['march 9', 302.0],\n", + " ['march 10', 297.0],\n", + " ['march 11', 323.0]]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stock_prices" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "302.0\n" + ] + } + ], + "source": [ + "# this is O(n)\n", + "\n", + "for element in stock_prices:\n", + " if element[0] == \"march 9\":\n", + " print(element[1])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dictionary- Python's use of hash map/tables - O(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "stock_prices = {}\n", + "with open(\"stock_prices.csv\", \"r\") as f:\n", + " for line in f:\n", + " tokens = line.split(',')\n", + " day = tokens[0]\n", + " price = float(tokens[1])\n", + " stock_prices[day] = price" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'march 6': 310.0,\n", + " 'march 7': 340.0,\n", + " 'march 8': 380.0,\n", + " 'march 9': 302.0,\n", + " 'march 10': 297.0,\n", + " 'march 11': 323.0}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stock_prices" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "302.0" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# this is O(1)\n", + " \n", + "stock_prices['march 9']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/data_structures/4_HashTable/Lab_2020_12_17_a.ipynb b/data_structures/4_HashTable/Lab_2020_12_17_a.ipynb new file mode 100644 index 0000000..0c77519 --- /dev/null +++ b/data_structures/4_HashTable/Lab_2020_12_17_a.ipynb @@ -0,0 +1,561 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# using asci numbers to make hash function\n", + "\n", + "def get_hash(key):\n", + " h = 0\n", + " for char in key:\n", + " # ord will return the asci value of the character\n", + " h += ord(char)\n", + " return h % 100" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "97" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# asci values\n", + "ord('a')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "61" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_hash('march 28')" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "class HashTable:\n", + " def __init__(self):\n", + " self.MAX = 100\n", + " self.arr = [None for i in range(self.MAX)]\n", + " \n", + " def get_hash(self, key):\n", + " h = 0\n", + " for char in key:\n", + " h += ord(char)\n", + " return h % self.MAX\n", + " \n", + " def add(self, key, val):\n", + " h = self.get_hash(key)\n", + " # assign the hash to the value\n", + " self.arr[h] = val\n", + " \n", + " def get(self, key):\n", + " h = self.get_hash(key)\n", + " return self.arr[h]\n", + " \n", + " # over-rider operator.__getitem(a,b)\n", + " # return the value of a at index b\n", + " # similar to dict access\n", + " def __setitem__(self, key, val): # dict look and feel with def add()\n", + " h = self.get_hash(key)\n", + " # assign the hash to the value\n", + " self.arr[h] = val\n", + "\n", + " def __getitem__(self, key): # dict look and feel with def get()\n", + " h = self.get_hash(key)\n", + " return self.arr[h]\n", + " \n", + " def __delitem__(self, key):\n", + " h = self.get_hash(key)\n", + " self.arr[h] = None\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "t = HashTable()\n", + "t.add('march 6', 130)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " 130,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t.arr" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "130" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t.get('march 6')" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "t = HashTable()\n", + "t['march 6'] = 130\n", + "t['march 1'] = 20\n", + "t['dec 17'] = 27" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[None,\n", + " None,\n", + " None,\n", + " None,\n", + " 20,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " 130,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " 27,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t.arr" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "27" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t['dec 17']" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "del t['march 6']" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[None,\n", + " None,\n", + " None,\n", + " None,\n", + " 20,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " 27,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None]" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t.arr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From bc03ac781685e4eecf165aa689ec18f5bb222b84 Mon Sep 17 00:00:00 2001 From: JeffreyAsuncion Date: Fri, 18 Dec 2020 00:59:09 -0500 Subject: [PATCH 2/2] hash maps, hash tables --- .../4_HashTable/Lab_2020_12_17_a.ipynb | 45 ++-- .../Lab_2020_12_18.ipynb | 236 ++++++++++++++++++ 2 files changed, 262 insertions(+), 19 deletions(-) create mode 100644 data_structures/4_HashTable_2_Collisions/Lab_2020_12_18.ipynb diff --git a/data_structures/4_HashTable/Lab_2020_12_17_a.ipynb b/data_structures/4_HashTable/Lab_2020_12_17_a.ipynb index 0c77519..b4830c4 100644 --- a/data_structures/4_HashTable/Lab_2020_12_17_a.ipynb +++ b/data_structures/4_HashTable/Lab_2020_12_17_a.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -27,7 +27,7 @@ "97" ] }, - "execution_count": 2, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -48,7 +48,7 @@ "61" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -59,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -103,7 +103,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -113,7 +113,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -221,7 +221,7 @@ " None]" ] }, - "execution_count": 21, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -232,7 +232,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -241,7 +241,7 @@ "130" ] }, - "execution_count": 16, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -252,7 +252,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -264,7 +264,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -372,7 +372,7 @@ " None]" ] }, - "execution_count": 28, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -383,7 +383,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -392,7 +392,7 @@ "27" ] }, - "execution_count": 29, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -403,7 +403,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -412,7 +412,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -520,7 +520,7 @@ " None]" ] }, - "execution_count": 31, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -529,6 +529,13 @@ "t.arr" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, diff --git a/data_structures/4_HashTable_2_Collisions/Lab_2020_12_18.ipynb b/data_structures/4_HashTable_2_Collisions/Lab_2020_12_18.ipynb new file mode 100644 index 0000000..1f493dc --- /dev/null +++ b/data_structures/4_HashTable_2_Collisions/Lab_2020_12_18.ipynb @@ -0,0 +1,236 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "class HashTable:\n", + " def __init__(self):\n", + " self.MAX = 10\n", + " self.arr = [[] for i in range(self.MAX)]\n", + " \n", + " def get_hash(self, key):\n", + " hash = 0\n", + " for char in key:\n", + " hash += ord(char)\n", + " return hash % self.MAX\n", + " \n", + " def __getitem__(self, key):\n", + " h = self.get_hash(key)\n", + " for element in self.arr[h]:\n", + " if element[0] == key:\n", + " return element[1]\n", + " \n", + " def __setitem__(self, key, val):\n", + " h = self.get_hash(key)\n", + " found = False\n", + " # Iterate thru array\n", + " for idx, element in enumerate(self.arr[h]):\n", + " # find out if the key exists \n", + " if len(element)==2 and element[0]==key:\n", + " self.arr[h][idx] = (key,val)\n", + " found = True\n", + " break\n", + " # key not found in array \n", + " if not found:\n", + " # append to array\n", + " self.arr[h].append((key,val))\n", + "\n", + " def __delitem__(self, key):\n", + " h = self.get_hash(key)\n", + " for index, element in enumerate(self.arr[h]):\n", + " if element[0] == key:\n", + " del self.arr[h][index]" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "t = HashTable()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "t[\"march 6\"] = 120\n", + "t[\"march 6\"] = 78\n", + "t[\"march 8\"] = 67\n", + "t[\"march 9\"] = 4\n", + "t[\"march 17\"] = 459" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "78" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# this returns a linked list\n", + "t['march 6']" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[],\n", + " [('march 8', 67)],\n", + " [('march 9', 4)],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [('march 6', 78), ('march 17', 459)]]" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t.arr" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "459" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t[\"march 17\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "del t[\"march 17\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[],\n", + " [('march 8', 67)],\n", + " [('march 9', 4)],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [('march 6', 78)]]" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t.arr" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "del t['march 6']" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[], [('march 8', 67)], [('march 9', 4)], [], [], [], [], [], [], []]" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t.arr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}