diff --git a/Clustering Techniques/Affinity-Propagation-Clustering-ALgorithm.py b/Clustering Techniques/Affinity-Propagation-Clustering-ALgorithm.py new file mode 100644 index 0000000..f111ff9 --- /dev/null +++ b/Clustering Techniques/Affinity-Propagation-Clustering-ALgorithm.py @@ -0,0 +1,37 @@ +from numpy import unique +from numpy import where +from matplotlib import pyplot +from sklearn.datasets import make_classification +from sklearn.cluster import AffinityPropagation + +# initialize the data set we'll work with +training_data, _ = make_classification( + n_samples=1000, + n_features=2, + n_informative=2, + n_redundant=0, + n_clusters_per_class=1, + random_state=4 +) + +# define the model +model = AffinityPropagation(damping=0.7) + +# train the model +model.fit(training_data) + +# assign each data point to a cluster +result = model.predict(training_data) + +# get all of the unique clusters +clusters = unique(result) + +# plot the clusters +for cluster in clusters: + # get data points that fall in this cluster + index = where(result == cluster) + # make the plot + pyplot.scatter(training_data[index, 0], training_data[index, 1]) + +# show the plot +pyplot.show() diff --git a/Clustering Techniques/Agglomerative-Clustering-Algorithm.py b/Clustering Techniques/Agglomerative-Clustering-Algorithm.py new file mode 100644 index 0000000..9786ab7 --- /dev/null +++ b/Clustering Techniques/Agglomerative-Clustering-Algorithm.py @@ -0,0 +1,34 @@ +from numpy import unique +from numpy import where +from matplotlib import pyplot +from sklearn.datasets import make_classification +from sklearn.cluster import AgglomerativeClustering + +# initialize the data set we'll work with +training_data, _ = make_classification( + n_samples=1000, + n_features=2, + n_informative=2, + n_redundant=0, + n_clusters_per_class=1, + random_state=4 +) + +# define the model +agglomerative_model = AgglomerativeClustering(n_clusters=2) + +# assign each data point to a cluster +agglomerative_result = agglomerative_model.fit_predict(training_data) + +# get all of the unique clusters +agglomerative_clusters = unique(agglomerative_result) + +# plot the clusters +for agglomerative_cluster in agglomerative_clusters: + # get data points that fall in this cluster + index = where(agglomerative_result == agglomerative_clusters) + # make the plot + pyplot.scatter(training_data[index, 0], training_data[index, 1]) + +# show the Agglomerative Hierarchy plot +pyplot.show() diff --git a/Clustering Techniques/BIRCH-Algorithm.py b/Clustering Techniques/BIRCH-Algorithm.py new file mode 100644 index 0000000..c209678 --- /dev/null +++ b/Clustering Techniques/BIRCH-Algorithm.py @@ -0,0 +1,37 @@ +from numpy import unique +from numpy import where +from matplotlib import pyplot +from sklearn.datasets import make_classification +from sklearn.cluster import Birch + +# initialize the data set we'll work with +training_data, _ = make_classification( + n_samples=1000, + n_features=2, + n_informative=2, + n_redundant=0, + n_clusters_per_class=1, + random_state=4 +) + +# define the model +birch_model = Birch(threshold=0.03, n_clusters=2) + +# train the model +birch_model.fit(training_data) + +# assign each data point to a cluster +birch_result = birch_model.predict(training_data) + +# get all of the unique clusters +birch_clusters = unique(birch_result) + +# plot the BIRCH clusters +for birch_cluster in birch_clusters: + # get data points that fall in this cluster + index = where(birch_result == birch_clusters) + # make the plot + pyplot.scatter(training_data[index, 0], training_data[index, 1]) + +# show the BIRCH plot +pyplot.show() diff --git a/Clustering Techniques/DBSCAN-Model.py b/Clustering Techniques/DBSCAN-Model.py new file mode 100644 index 0000000..06b9af2 --- /dev/null +++ b/Clustering Techniques/DBSCAN-Model.py @@ -0,0 +1,37 @@ +from numpy import unique +from numpy import where +from matplotlib import pyplot +from sklearn.datasets import make_classification +from sklearn.cluster import DBSCAN + +# initialize the data set we'll work with +training_data, _ = make_classification( + n_samples=1000, + n_features=2, + n_informative=2, + n_redundant=0, + n_clusters_per_class=1, + random_state=4 +) + +# define the model +dbscan_model = DBSCAN(eps=0.25, min_samples=9) + +# train the model +dbscan_model.fit(training_data) + +# assign each data point to a cluster +dbscan_result = dbscan_model.predict(training_data) + +# get all of the unique clusters +dbscan_cluster = unique(dbscan_result) + +# plot the DBSCAN clusters +for dbscan_cluster in dbscan_clusters: + # get data points that fall in this cluster + index = where(dbscan_result == dbscan_clusters) + # make the plot + pyplot.scatter(training_data[index, 0], training_data[index, 1]) + +# show the DBSCAN plot +pyplot.show() diff --git a/Clustering Techniques/Gaussain-Mixture-Model.py b/Clustering Techniques/Gaussain-Mixture-Model.py new file mode 100644 index 0000000..edfcd26 --- /dev/null +++ b/Clustering Techniques/Gaussain-Mixture-Model.py @@ -0,0 +1,37 @@ +from numpy import unique +from numpy import where +from matplotlib import pyplot +from sklearn.datasets import make_classification +from sklearn.mixture import GaussianMixture + +# initialize the data set we'll work with +training_data, _ = make_classification( + n_samples=1000, + n_features=2, + n_informative=2, + n_redundant=0, + n_clusters_per_class=1, + random_state=4 +) + +# define the model +gaussian_model = GaussianMixture(n_components=2) + +# train the model +gaussian_model.fit(training_data) + +# assign each data point to a cluster +gaussian_result = gaussian_model.predict(training_data) + +# get all of the unique clusters +gaussian_clusters = unique(gaussian_result) + +# plot Gaussian Mixture the clusters +for gaussian_cluster in gaussian_clusters: + # get data points that fall in this cluster + index = where(gaussian_result == gaussian_clusters) + # make the plot + pyplot.scatter(training_data[index, 0], training_data[index, 1]) + +# show the Gaussian Mixture plot +pyplot.show() diff --git a/Clustering Techniques/Mean-Shift-Clustering-algorithm.py b/Clustering Techniques/Mean-Shift-Clustering-algorithm.py new file mode 100644 index 0000000..230c67b --- /dev/null +++ b/Clustering Techniques/Mean-Shift-Clustering-algorithm.py @@ -0,0 +1,34 @@ +from numpy import unique +from numpy import where +from matplotlib import pyplot +from sklearn.datasets import make_classification +from sklearn.cluster import MeanShift + +# initialize the data set we'll work with +training_data, _ = make_classification( + n_samples=1000, + n_features=2, + n_informative=2, + n_redundant=0, + n_clusters_per_class=1, + random_state=4 +) + +# define the model +mean_model = MeanShift() + +# assign each data point to a cluster +mean_result = mean_model.fit_predict(training_data) + +# get all of the unique clusters +mean_clusters = unique(mean_result) + +# plot Mean-Shift the clusters +for mean_cluster in mean_clusters: + # get data points that fall in this cluster + index = where(mean_result == mean_cluster) + # make the plot + pyplot.scatter(training_data[index, 0], training_data[index, 1]) + +# show the Mean-Shift plot +pyplot.show() diff --git a/Clustering Techniques/OPTICS-algorithm.py b/Clustering Techniques/OPTICS-algorithm.py new file mode 100644 index 0000000..c198fa3 --- /dev/null +++ b/Clustering Techniques/OPTICS-algorithm.py @@ -0,0 +1,34 @@ +from numpy import unique +from numpy import where +from matplotlib import pyplot +from sklearn.datasets import make_classification +from sklearn.cluster import OPTICS + +# initialize the data set we'll work with +training_data, _ = make_classification( + n_samples=1000, + n_features=2, + n_informative=2, + n_redundant=0, + n_clusters_per_class=1, + random_state=4 +) + +# define the model +optics_model = OPTICS(eps=0.75, min_samples=10) + +# assign each data point to a cluster +optics_result = optics_model.fit_predict(training_data) + +# get all of the unique clusters +optics_clusters = unique(optics_clusters) + +# plot OPTICS the clusters +for optics_cluster in optics_clusters: + # get data points that fall in this cluster + index = where(optics_result == optics_clusters) + # make the plot + pyplot.scatter(training_data[index, 0], training_data[index, 1]) + +# show the OPTICS plot +pyplot.show() diff --git a/Regression-Techniques/simple-linear-regression.py b/Regression-Techniques/simple-linear-regression.py new file mode 100644 index 0000000..0f6da43 --- /dev/null +++ b/Regression-Techniques/simple-linear-regression.py @@ -0,0 +1,54 @@ +import numpy as np +import matplotlib.pyplot as plt + +def estimate_coef(x, y): + # number of observations/points + n = np.size(x) + + # mean of x and y vector + m_x = np.mean(x) + m_y = np.mean(y) + + # calculating cross-deviation and deviation about x + SS_xy = np.sum(y*x) - n*m_y*m_x + SS_xx = np.sum(x*x) - n*m_x*m_x + + # calculating regression coefficients + b_1 = SS_xy / SS_xx + b_0 = m_y - b_1*m_x + + return (b_0, b_1) + +def plot_regression_line(x, y, b): + # plotting the actual points as scatter plot + plt.scatter(x, y, color = "m", + marker = "o", s = 30) + + # predicted response vector + y_pred = b[0] + b[1]*x + + # plotting the regression line + plt.plot(x, y_pred, color = "g") + + # putting labels + plt.xlabel('x') + plt.ylabel('y') + + # function to show plot + plt.show() + +def main(): + # observations / data + x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12]) + + # estimating coefficients + b = estimate_coef(x, y) + print("Estimated coefficients:\nb_0 = {} \ + \nb_1 = {}".format(b[0], b[1])) + + # plotting regression line + plot_regression_line(x, y, b) + +if __name__ == "__main__": + main()