1- # Required imports to run this file
21import matplotlib .pyplot as plt
32import numpy as np
43
54
6- # weighted matrix
7- def weighted_matrix (point : np .mat , training_data_x : np .mat , bandwidth : float ) -> np .mat :
5+ def weighted_matrix (
6+ point : np .array , training_data_x : np .array , bandwidth : float
7+ ) -> np .array :
88 """
9- Calculate the weight for every point in the
10- data set. It takes training_point , query_point, and tau
11- Here Tau is not a fixed value it can be varied depends on output.
12- tau --> bandwidth
13- xmat -->Training data
14- point --> the x where we want to make predictions
15- >>> weighted_matrix(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68],
16- ... [24.59,25.69]]), 0.6)
17- matrix([[1.43807972e-207, 0.00000000e+000, 0.00000000e+000],
18- [0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
19- [0.00000000e+000, 0.00000000e+000, 0.00000000e+000]])
9+ Calculate the weight for every point in the data set.
10+ point --> the x value at which we want to make predictions
11+ >>> weighted_matrix(
12+ ... np.array([1., 1.]),
13+ ... np.array([[16.99, 10.34], [21.01,23.68], [24.59,25.69]]),
14+ ... 0.6
15+ ... )
16+ array([[1.43807972e-207, 0.00000000e+000, 0.00000000e+000],
17+ [0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
18+ [0.00000000e+000, 0.00000000e+000, 0.00000000e+000]])
2019 """
21- # m is the number of training samples
22- m , n = np .shape (training_data_x )
23- # Initializing weights as identity matrix
24- weights = np .mat (np .eye (m ))
20+ m , _ = np .shape (training_data_x ) # m is the number of training samples
21+ weights = np .eye (m ) # Initializing weights as identity matrix
22+
2523 # calculating weights for all training examples [x(i)'s]
2624 for j in range (m ):
2725 diff = point - training_data_x [j ]
28- weights [j , j ] = np .exp (diff * diff .T / (- 2.0 * bandwidth ** 2 ))
26+ weights [j , j ] = np .exp (diff @ diff .T / (- 2.0 * bandwidth ** 2 ))
2927 return weights
3028
3129
3230def local_weight (
33- point : np .mat , training_data_x : np .mat , training_data_y : np .mat , bandwidth : float
34- ) -> np .mat :
31+ point : np .array ,
32+ training_data_x : np .array ,
33+ training_data_y : np .array ,
34+ bandwidth : float ,
35+ ) -> np .array :
3536 """
3637 Calculate the local weights using the weight_matrix function on training data.
3738 Return the weighted matrix.
38- >>> local_weight(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68],
39- ... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
40- matrix([[0.00873174],
41- [0.08272556]])
39+ >>> local_weight(
40+ ... np.array([1., 1.]),
41+ ... np.array([[16.99, 10.34], [21.01,23.68], [24.59,25.69]]),
42+ ... np.array([[1.01, 1.66, 3.5]]),
43+ ... 0.6
44+ ... )
45+ array([[0.00873174],
46+ [0.08272556]])
4247 """
4348 weight = weighted_matrix (point , training_data_x , bandwidth )
44- w = (training_data_x .T * (weight * training_data_x )). I * (
45- training_data_x .T * weight * training_data_y .T
49+ w = np . linalg . inv (training_data_x .T @ (weight @ training_data_x )) @ (
50+ training_data_x .T @ weight @ training_data_y .T
4651 )
4752
4853 return w
4954
5055
5156def local_weight_regression (
52- training_data_x : np .mat , training_data_y : np .mat , bandwidth : float
53- ) -> np .mat :
57+ training_data_x : np .array , training_data_y : np .array , bandwidth : float
58+ ) -> np .array :
5459 """
55- Calculate predictions for each data point on axis.
56- >>> local_weight_regression(np.mat([[16.99, 10.34], [21.01,23.68],
57- ... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
60+ Calculate predictions for each data point on axis
61+ >>> local_weight_regression(
62+ ... np.array([[16.99, 10.34], [21.01, 23.68], [24.59, 25.69]]),
63+ ... np.array([[1.01, 1.66, 3.5]]),
64+ ... 0.6
65+ ... )
5866 array([1.07173261, 1.65970737, 3.50160179])
5967 """
60- m , n = np .shape (training_data_x )
68+ m , _ = np .shape (training_data_x )
6169 ypred = np .zeros (m )
6270
6371 for i , item in enumerate (training_data_x ):
64- ypred [i ] = item * local_weight (
72+ ypred [i ] = item @ local_weight (
6573 item , training_data_x , training_data_y , bandwidth
6674 )
6775
6876 return ypred
6977
7078
71- def load_data (dataset_name : str , cola_name : str , colb_name : str ) -> np .mat :
79+ def load_data (
80+ dataset_name : str , cola_name : str , colb_name : str
81+ ) -> tuple [np .array , np .array , np .array , np .array ]:
7282 """
73- Function used for loading data from the seaborn splitting into x and y points
83+ Load data from seaborn and split it into x and y points
7484 """
7585 import seaborn as sns
7686
7787 data = sns .load_dataset (dataset_name )
7888 col_a = np .array (data [cola_name ]) # total_bill
7989 col_b = np .array (data [colb_name ]) # tip
8090
81- mcol_a = np . mat ( col_a )
82- mcol_b = np . mat ( col_b )
91+ mcol_a = col_a . copy ( )
92+ mcol_b = col_b . copy ( )
8393
84- m = np .shape (mcol_b )[1 ]
85- one = np .ones ((1 , m ), dtype = int )
94+ one = np .ones (np .shape (mcol_b )[0 ], dtype = int )
8695
87- # horizontal stacking
88- training_data_x = np .hstack ((one . T , mcol_a . T ))
96+ # pairing elements of one and mcol_a
97+ training_data_x = np .column_stack ((one , mcol_a ))
8998
9099 return training_data_x , mcol_b , col_a , col_b
91100
92101
93- def get_preds (training_data_x : np .mat , mcol_b : np .mat , tau : float ) -> np .ndarray :
102+ def get_preds (training_data_x : np .array , mcol_b : np .array , tau : float ) -> np .array :
94103 """
95104 Get predictions with minimum error for each training data
96- >>> get_preds(np.mat([[16.99, 10.34], [21.01,23.68],
97- ... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
105+ >>> get_preds(
106+ ... np.array([[16.99, 10.34], [21.01, 23.68], [24.59, 25.69]]),
107+ ... np.array([[1.01, 1.66, 3.5]]),
108+ ... 0.6
109+ ... )
98110 array([1.07173261, 1.65970737, 3.50160179])
99111 """
100112 ypred = local_weight_regression (training_data_x , mcol_b , tau )
101113 return ypred
102114
103115
104116def plot_preds (
105- training_data_x : np .mat ,
106- predictions : np .ndarray ,
107- col_x : np .ndarray ,
108- col_y : np .ndarray ,
117+ training_data_x : np .array ,
118+ predictions : np .array ,
119+ col_x : np .array ,
120+ col_y : np .array ,
109121 cola_name : str ,
110122 colb_name : str ,
111123) -> plt .plot :
112124 """
113- This function used to plot predictions and display the graph
125+ Plot predictions and display the graph
114126 """
115127 xsort = training_data_x .copy ()
116128 xsort .sort (axis = 0 )
@@ -128,6 +140,10 @@ def plot_preds(
128140
129141
130142if __name__ == "__main__" :
143+ import doctest
144+
145+ doctest .testmod ()
146+
131147 training_data_x , mcol_b , col_a , col_b = load_data ("tips" , "total_bill" , "tip" )
132148 predictions = get_preds (training_data_x , mcol_b , 0.5 )
133149 plot_preds (training_data_x , predictions , col_a , col_b , "total_bill" , "tip" )
0 commit comments