added assignment notebooks
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -176,3 +176,5 @@ Cargo.lock
|
|||||||
# MSVC Windows builds of rustc generate these, which store debugging information
|
# MSVC Windows builds of rustc generate these, which store debugging information
|
||||||
*.pdb
|
*.pdb
|
||||||
|
|
||||||
|
|
||||||
|
*.csv
|
||||||
|
|||||||
260
python/Assignment_tutorial_4.ipynb
Normal file
260
python/Assignment_tutorial_4.ipynb
Normal file
@@ -0,0 +1,260 @@
|
|||||||
|
{
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 0,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"provenance": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"name": "python3",
|
||||||
|
"display_name": "Python 3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"# Tutorial 4 Assignment - Logistic Regression\n",
|
||||||
|
"\n",
|
||||||
|
"We have provided you with a preprocessed dataset, the first cell will load and set everything up for you.\n",
|
||||||
|
"The objectives for you to complete are as follows:\n",
|
||||||
|
"1. Code up the commented functions on your own.\n",
|
||||||
|
"2. Every step that you must code are explained as comments, use them as hints.\n",
|
||||||
|
"\n",
|
||||||
|
"The last cell has the code set up for training the model. We expect each one to have trained the model, and note down the best accuracy that they can achieve, and the conditions required to do the same."
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "1k2vhsMVv0Pk"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"!wget -O dataset.csv \"https://docs.google.com/spreadsheets/d/1RNtDIvisrnOmjJxS7aPm-45NtOH3qd5-mgd2bHeSOGA/export?format=csv&gid=1727131321\"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from sklearn.preprocessing import StandardScaler\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"\n",
|
||||||
|
"df=pd.read_csv('/content/dataset.csv')\n",
|
||||||
|
"\n",
|
||||||
|
"df.head()\n",
|
||||||
|
"X = df.drop(['RainTomorrow'], axis=1)\n",
|
||||||
|
"y = df['RainTomorrow']\n",
|
||||||
|
"scaler = StandardScaler()\n",
|
||||||
|
"X_scaled = scaler.fit_transform(X)\n",
|
||||||
|
"X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)\n"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "nj3rHkttqucf",
|
||||||
|
"outputId": "0ff8d346-148a-4dd9-df8c-4696d2a002c3"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": [
|
||||||
|
"--2024-08-30 09:47:10-- https://docs.google.com/spreadsheets/d/1RNtDIvisrnOmjJxS7aPm-45NtOH3qd5-mgd2bHeSOGA/export?format=csv&gid=1727131321\n",
|
||||||
|
"Resolving docs.google.com (docs.google.com)... 74.125.132.138, 74.125.132.139, 74.125.132.113, ...\n",
|
||||||
|
"Connecting to docs.google.com (docs.google.com)|74.125.132.138|:443... connected.\n",
|
||||||
|
"HTTP request sent, awaiting response... 307 Temporary Redirect\n",
|
||||||
|
"Location: https://doc-00-c8-sheets.googleusercontent.com/export/54bogvaave6cua4cdnls17ksc4/p30qcagdcmtcqd8ure4jdd8ec0/1725011230000/112261653790527273724/*/1RNtDIvisrnOmjJxS7aPm-45NtOH3qd5-mgd2bHeSOGA?format=csv&gid=1727131321 [following]\n",
|
||||||
|
"Warning: wildcards not supported in HTTP.\n",
|
||||||
|
"--2024-08-30 09:47:10-- https://doc-00-c8-sheets.googleusercontent.com/export/54bogvaave6cua4cdnls17ksc4/p30qcagdcmtcqd8ure4jdd8ec0/1725011230000/112261653790527273724/*/1RNtDIvisrnOmjJxS7aPm-45NtOH3qd5-mgd2bHeSOGA?format=csv&gid=1727131321\n",
|
||||||
|
"Resolving doc-00-c8-sheets.googleusercontent.com (doc-00-c8-sheets.googleusercontent.com)... 74.125.201.132, 2607:f8b0:4001:c01::84\n",
|
||||||
|
"Connecting to doc-00-c8-sheets.googleusercontent.com (doc-00-c8-sheets.googleusercontent.com)|74.125.201.132|:443... connected.\n",
|
||||||
|
"HTTP request sent, awaiting response... 200 OK\n",
|
||||||
|
"Length: unspecified [text/csv]\n",
|
||||||
|
"Saving to: ‘dataset.csv’\n",
|
||||||
|
"\n",
|
||||||
|
"dataset.csv [ <=> ] 12.04M 5.00MB/s in 2.4s \n",
|
||||||
|
"\n",
|
||||||
|
"2024-08-30 09:47:17 (5.00 MB/s) - ‘dataset.csv’ saved [12621972]\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"# CODE BELOW"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "kiZ4TBA6wFea"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "2A-RJ6Rscyoa"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import numpy as np\n",
|
||||||
|
"\n",
|
||||||
|
"def sigmoid(z):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Compute the sigmoid function.\n",
|
||||||
|
"\n",
|
||||||
|
" Parameters:\n",
|
||||||
|
" z : numpy array\n",
|
||||||
|
" Linear combination of weights and input features.\n",
|
||||||
|
"\n",
|
||||||
|
" Returns:\n",
|
||||||
|
" numpy array\n",
|
||||||
|
" Sigmoid of input z.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" pass\n",
|
||||||
|
"\n",
|
||||||
|
"def initialize_weights(n_features):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Initialize weights and bias to zero.\n",
|
||||||
|
"\n",
|
||||||
|
" Parameters:\n",
|
||||||
|
" n_features : int\n",
|
||||||
|
" Number of features in the dataset.\n",
|
||||||
|
"\n",
|
||||||
|
" Returns:\n",
|
||||||
|
" tuple\n",
|
||||||
|
" Initialized weights and bias.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" # initialize the weights and bias to zero (hint: make sure dimentions are correct)\n",
|
||||||
|
"\n",
|
||||||
|
" return weights, bias\n",
|
||||||
|
"\n",
|
||||||
|
"def compute_cost(y, y_pred):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Compute the cost function for logistic regression.\n",
|
||||||
|
"\n",
|
||||||
|
" Parameters:\n",
|
||||||
|
" y : numpy array\n",
|
||||||
|
" Actual labels.\n",
|
||||||
|
" y_pred : numpy array\n",
|
||||||
|
" Predicted probabilities.\n",
|
||||||
|
"\n",
|
||||||
|
" Returns:\n",
|
||||||
|
" float\n",
|
||||||
|
" The cost value.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" # compute the cost\n",
|
||||||
|
"\n",
|
||||||
|
" return cost\n",
|
||||||
|
"\n",
|
||||||
|
"def compute_gradients(X, y, y_pred):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Compute the gradients for weights and bias.\n",
|
||||||
|
"\n",
|
||||||
|
" Parameters:\n",
|
||||||
|
" X : numpy array\n",
|
||||||
|
" Feature matrix.\n",
|
||||||
|
" y : numpy array\n",
|
||||||
|
" Actual labels.\n",
|
||||||
|
" y_pred : numpy array\n",
|
||||||
|
" Predicted probabilities.\n",
|
||||||
|
"\n",
|
||||||
|
" Returns:\n",
|
||||||
|
" tuple\n",
|
||||||
|
" Gradients of weights and bias.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" m = X.shape[0]\n",
|
||||||
|
"\n",
|
||||||
|
" # compute dw\n",
|
||||||
|
"\n",
|
||||||
|
" # compute db\n",
|
||||||
|
"\n",
|
||||||
|
" return dw, db\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def optimize(X, y, weights, bias, learning_rate, num_iterations):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Perform gradient descent to optimize weights and bias.\n",
|
||||||
|
"\n",
|
||||||
|
" Parameters:\n",
|
||||||
|
" X : numpy array\n",
|
||||||
|
" Feature matrix.\n",
|
||||||
|
" y : numpy array\n",
|
||||||
|
" Actual labels.\n",
|
||||||
|
" weights : numpy array\n",
|
||||||
|
" Weights of the model.\n",
|
||||||
|
" bias : float\n",
|
||||||
|
" Bias of the model.\n",
|
||||||
|
" learning_rate : float\n",
|
||||||
|
" Learning rate for gradient descent.\n",
|
||||||
|
" num_iterations : int\n",
|
||||||
|
" Number of iterations for gradient descent.\n",
|
||||||
|
"\n",
|
||||||
|
" Returns:\n",
|
||||||
|
" tuple\n",
|
||||||
|
" Optimized weights, bias, and the list of costs.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" costs = []\n",
|
||||||
|
"\n",
|
||||||
|
" for i in range(num_iterations):\n",
|
||||||
|
" # Compute linear model\n",
|
||||||
|
"\n",
|
||||||
|
" # Apply sigmoid function\n",
|
||||||
|
"\n",
|
||||||
|
" # Compute cost\n",
|
||||||
|
"\n",
|
||||||
|
" # Compute gradients\n",
|
||||||
|
"\n",
|
||||||
|
" # Update weights and bias\n",
|
||||||
|
" pass\n",
|
||||||
|
" return weights, bias, costs\n",
|
||||||
|
"\n",
|
||||||
|
"def predict(X, weights, bias):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Predict the binary labels for a dataset.\n",
|
||||||
|
"\n",
|
||||||
|
" Parameters:\n",
|
||||||
|
" X : numpy array\n",
|
||||||
|
" Feature matrix.\n",
|
||||||
|
" weights : numpy array\n",
|
||||||
|
" Weights of the model.\n",
|
||||||
|
" bias : float\n",
|
||||||
|
" Bias of the model.\n",
|
||||||
|
"\n",
|
||||||
|
" Returns:\n",
|
||||||
|
" numpy array\n",
|
||||||
|
" Predicted binary labels (0 or 1).\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" z = np.dot(X, weights) + bias\n",
|
||||||
|
" y_pred = sigmoid(z)\n",
|
||||||
|
" predictions = [1 if i > 0.5 else 0 for i in y_pred]\n",
|
||||||
|
" return np.array(predictions)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"# COMPUTE ACCURACY"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "v9rwH83Rwrfk"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"weights, bias,costs = optimize()\n",
|
||||||
|
"y_pred = predict(X_test,weights,bias)\n",
|
||||||
|
"matches = np.sum(y_test == y_pred)\n",
|
||||||
|
"mismatches = np.sum(y_test != y_pred)\n",
|
||||||
|
"print(f\"Accuracy: {matches/(matches+mismatches)}\")"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "eyGEV4mWB-rW"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
936
python/Tutorial3.ipynb
Normal file
936
python/Tutorial3.ipynb
Normal file
@@ -0,0 +1,936 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "i6sPnhMH0raw"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"# House Price Prediction\n",
|
||||||
|
"\n",
|
||||||
|
"Think of finding the perfect house as a complex journey involving negotiations, research, and decision-making. Now, imagine having a smart guide that helps you navigate through this maze by analyzing data and predicting outcomes. Linear regression is that guide. In this tutorial, we'll explore how linear regression helps us understand and predict relationships in data, just like finding the ideal house by matching features with price. Let's get started and discover how this powerful tool can simplify your data-driven decisions!"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "huwdh4Es3EZ2"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Setup\n",
|
||||||
|
"\n",
|
||||||
|
"The House Price Prediction Dataset contains 13 features\n",
|
||||||
|
"\n",
|
||||||
|
"| # | Column Name | Description |\n",
|
||||||
|
"|----|----------------|---------------------------------------------------------------------|\n",
|
||||||
|
"| 1 | Id | To count the records. |\n",
|
||||||
|
"| 2 | MSSubClass | Identifies the type of dwelling involved in the sale. |\n",
|
||||||
|
"| 3 | MSZoning | Identifies the general zoning classification of the sale. |\n",
|
||||||
|
"| 4 | LotArea | Lot size in square feet. |\n",
|
||||||
|
"| 5 | LotConfig | Configuration of the lot |\n",
|
||||||
|
"| 6 | BldgType | Type of dwelling |\n",
|
||||||
|
"| 7 | OverallCond | Rates the overall condition of the house |\n",
|
||||||
|
"| 8 | YearBuilt | Original construction year |\n",
|
||||||
|
"| 9 | YearRemodAdd | Remodel date (same as construction date if no remodeling or additions). |\n",
|
||||||
|
"| 10 | Exterior1st | Exterior covering on house |\n",
|
||||||
|
"| 11 | BsmtFinSF2 | Type 2 finished square feet. |\n",
|
||||||
|
"| 12 | TotalBsmtSF | Total square feet of basement area |\n",
|
||||||
|
"| 13 | SalePrice | To be predicted |\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"Run the cell below to download dataset"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "W5Y7ZPKz0nf8",
|
||||||
|
"outputId": "786a4293-3f63-4065-c005-4d5b11d8c15b"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"--2024-08-23 11:26:23-- https://docs.google.com/spreadsheets/d/1caaR9pT24GNmq3rDQpMiIMJrmiTGarbs/export?format=csv&id=1caaR9pT24GNmq3rDQpMiIMJrmiTGarbs&gid=1150341366\n",
|
||||||
|
"Resolving docs.google.com (docs.google.com)... 172.217.164.14, 2607:f8b0:4025:803::200e\n",
|
||||||
|
"Connecting to docs.google.com (docs.google.com)|172.217.164.14|:443... connected.\n",
|
||||||
|
"HTTP request sent, awaiting response... 307 Temporary Redirect\n",
|
||||||
|
"Location: https://doc-08-30-sheets.googleusercontent.com/export/54bogvaave6cua4cdnls17ksc4/pq9qv18vh410aflmm0u38bfc4o/1724412380000/115253717745408081083/*/1caaR9pT24GNmq3rDQpMiIMJrmiTGarbs?format=csv&id=1caaR9pT24GNmq3rDQpMiIMJrmiTGarbs&gid=1150341366 [following]\n",
|
||||||
|
"Warning: wildcards not supported in HTTP.\n",
|
||||||
|
"--2024-08-23 11:26:24-- https://doc-08-30-sheets.googleusercontent.com/export/54bogvaave6cua4cdnls17ksc4/pq9qv18vh410aflmm0u38bfc4o/1724412380000/115253717745408081083/*/1caaR9pT24GNmq3rDQpMiIMJrmiTGarbs?format=csv&id=1caaR9pT24GNmq3rDQpMiIMJrmiTGarbs&gid=1150341366\n",
|
||||||
|
"Resolving doc-08-30-sheets.googleusercontent.com (doc-08-30-sheets.googleusercontent.com)... 172.217.12.1, 2607:f8b0:4025:815::2001\n",
|
||||||
|
"Connecting to doc-08-30-sheets.googleusercontent.com (doc-08-30-sheets.googleusercontent.com)|172.217.12.1|:443... connected.\n",
|
||||||
|
"HTTP request sent, awaiting response... 200 OK\n",
|
||||||
|
"Length: unspecified [text/csv]\n",
|
||||||
|
"Saving to: ‘dataset.csv’\n",
|
||||||
|
"\n",
|
||||||
|
"dataset.csv [ <=> ] 171.41K --.-KB/s in 0.03s \n",
|
||||||
|
"\n",
|
||||||
|
"2024-08-23 11:26:24 (5.15 MB/s) - ‘dataset.csv’ saved [175524]\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"!wget -O dataset.csv \"https://docs.google.com/spreadsheets/d/1caaR9pT24GNmq3rDQpMiIMJrmiTGarbs/export?format=csv&id=1caaR9pT24GNmq3rDQpMiIMJrmiTGarbs&gid=1150341366\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "-r572AXndHZn"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Importing libraries"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "Vrb6A7dQdGK2"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
||||||
|
"from sklearn.preprocessing import MinMaxScaler\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"from sklearn.metrics import mean_squared_error\n",
|
||||||
|
"from sklearn.metrics import mean_absolute_error\n",
|
||||||
|
"import matplotlib.pyplot as plt"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "KLdgiCGJ9dN-"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Create DataFrame"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "NhdMHsM4831s"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"file_path = '/content/dataset.csv'\n",
|
||||||
|
"df = pd.read_csv(file_path)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "KtYExjQRHBQo"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"# Preprocessing"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "x7n5GmAWwoO_"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df.drop(['Id'],axis=1,inplace=True)\n",
|
||||||
|
"df['SalePrice'].fillna(df['SalePrice'].mean(), inplace=True)\n",
|
||||||
|
"df_copy=df.copy()\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "2LrDmZryG9Pm"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"new_data = df_copy.dropna()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "Q0PeWhNYHTdl",
|
||||||
|
"outputId": "f3988361-0a4c-4155-dbf2-d852e1d90ccd"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Categorical variables:\n",
|
||||||
|
"['MSZoning', 'LotConfig', 'BldgType', 'Exterior1st']\n",
|
||||||
|
"No. of. categorical features: 4\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"s = (new_data.dtypes == 'object')\n",
|
||||||
|
"object_cols = list(s[s].index)\n",
|
||||||
|
"print(\"Categorical variables:\")\n",
|
||||||
|
"print(object_cols)\n",
|
||||||
|
"print('No. of. categorical features: ',\n",
|
||||||
|
"\tlen(object_cols))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "qzkrtVtEd2ab"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### One Hot Encoding"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 307
|
||||||
|
},
|
||||||
|
"id": "HwH4nB9QILS8",
|
||||||
|
"outputId": "a1ccab90-5d6d-4c09-b5b8-c5d9a9b831a8"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"/usr/local/lib/python3.10/dist-packages/sklearn/preprocessing/_encoders.py:975: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n",
|
||||||
|
" warnings.warn(\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"application/vnd.google.colaboratory.intrinsic+json": {
|
||||||
|
"type": "dataframe",
|
||||||
|
"variable_name": "df_final"
|
||||||
|
},
|
||||||
|
"text/html": [
|
||||||
|
"\n",
|
||||||
|
" <div id=\"df-0f237bc8-b111-4549-9ccd-7f7bc7b2cbcf\" class=\"colab-df-container\">\n",
|
||||||
|
" <div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>MSSubClass</th>\n",
|
||||||
|
" <th>LotArea</th>\n",
|
||||||
|
" <th>OverallCond</th>\n",
|
||||||
|
" <th>YearBuilt</th>\n",
|
||||||
|
" <th>YearRemodAdd</th>\n",
|
||||||
|
" <th>BsmtFinSF2</th>\n",
|
||||||
|
" <th>TotalBsmtSF</th>\n",
|
||||||
|
" <th>SalePrice</th>\n",
|
||||||
|
" <th>MSZoning_C (all)</th>\n",
|
||||||
|
" <th>MSZoning_FV</th>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <th>Exterior1st_CemntBd</th>\n",
|
||||||
|
" <th>Exterior1st_HdBoard</th>\n",
|
||||||
|
" <th>Exterior1st_ImStucc</th>\n",
|
||||||
|
" <th>Exterior1st_MetalSd</th>\n",
|
||||||
|
" <th>Exterior1st_Plywood</th>\n",
|
||||||
|
" <th>Exterior1st_Stone</th>\n",
|
||||||
|
" <th>Exterior1st_Stucco</th>\n",
|
||||||
|
" <th>Exterior1st_VinylSd</th>\n",
|
||||||
|
" <th>Exterior1st_Wd Sdng</th>\n",
|
||||||
|
" <th>Exterior1st_WdShing</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>60</td>\n",
|
||||||
|
" <td>8450</td>\n",
|
||||||
|
" <td>5</td>\n",
|
||||||
|
" <td>2003</td>\n",
|
||||||
|
" <td>2003</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>856.0</td>\n",
|
||||||
|
" <td>208500.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>1.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>20</td>\n",
|
||||||
|
" <td>9600</td>\n",
|
||||||
|
" <td>8</td>\n",
|
||||||
|
" <td>1976</td>\n",
|
||||||
|
" <td>1976</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>1262.0</td>\n",
|
||||||
|
" <td>181500.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>1.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>60</td>\n",
|
||||||
|
" <td>11250</td>\n",
|
||||||
|
" <td>5</td>\n",
|
||||||
|
" <td>2001</td>\n",
|
||||||
|
" <td>2002</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>920.0</td>\n",
|
||||||
|
" <td>223500.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>1.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>70</td>\n",
|
||||||
|
" <td>9550</td>\n",
|
||||||
|
" <td>5</td>\n",
|
||||||
|
" <td>1915</td>\n",
|
||||||
|
" <td>1970</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>756.0</td>\n",
|
||||||
|
" <td>140000.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>1.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>60</td>\n",
|
||||||
|
" <td>14260</td>\n",
|
||||||
|
" <td>5</td>\n",
|
||||||
|
" <td>2000</td>\n",
|
||||||
|
" <td>2000</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>1145.0</td>\n",
|
||||||
|
" <td>250000.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>1.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>5 rows × 38 columns</p>\n",
|
||||||
|
"</div>\n",
|
||||||
|
" <div class=\"colab-df-buttons\">\n",
|
||||||
|
"\n",
|
||||||
|
" <div class=\"colab-df-container\">\n",
|
||||||
|
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-0f237bc8-b111-4549-9ccd-7f7bc7b2cbcf')\"\n",
|
||||||
|
" title=\"Convert this dataframe to an interactive table.\"\n",
|
||||||
|
" style=\"display:none;\">\n",
|
||||||
|
"\n",
|
||||||
|
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
|
||||||
|
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
|
||||||
|
" </svg>\n",
|
||||||
|
" </button>\n",
|
||||||
|
"\n",
|
||||||
|
" <style>\n",
|
||||||
|
" .colab-df-container {\n",
|
||||||
|
" display:flex;\n",
|
||||||
|
" gap: 12px;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .colab-df-convert {\n",
|
||||||
|
" background-color: #E8F0FE;\n",
|
||||||
|
" border: none;\n",
|
||||||
|
" border-radius: 50%;\n",
|
||||||
|
" cursor: pointer;\n",
|
||||||
|
" display: none;\n",
|
||||||
|
" fill: #1967D2;\n",
|
||||||
|
" height: 32px;\n",
|
||||||
|
" padding: 0 0 0 0;\n",
|
||||||
|
" width: 32px;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .colab-df-convert:hover {\n",
|
||||||
|
" background-color: #E2EBFA;\n",
|
||||||
|
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
||||||
|
" fill: #174EA6;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .colab-df-buttons div {\n",
|
||||||
|
" margin-bottom: 4px;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" [theme=dark] .colab-df-convert {\n",
|
||||||
|
" background-color: #3B4455;\n",
|
||||||
|
" fill: #D2E3FC;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" [theme=dark] .colab-df-convert:hover {\n",
|
||||||
|
" background-color: #434B5C;\n",
|
||||||
|
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
||||||
|
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
||||||
|
" fill: #FFFFFF;\n",
|
||||||
|
" }\n",
|
||||||
|
" </style>\n",
|
||||||
|
"\n",
|
||||||
|
" <script>\n",
|
||||||
|
" const buttonEl =\n",
|
||||||
|
" document.querySelector('#df-0f237bc8-b111-4549-9ccd-7f7bc7b2cbcf button.colab-df-convert');\n",
|
||||||
|
" buttonEl.style.display =\n",
|
||||||
|
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
||||||
|
"\n",
|
||||||
|
" async function convertToInteractive(key) {\n",
|
||||||
|
" const element = document.querySelector('#df-0f237bc8-b111-4549-9ccd-7f7bc7b2cbcf');\n",
|
||||||
|
" const dataTable =\n",
|
||||||
|
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
||||||
|
" [key], {});\n",
|
||||||
|
" if (!dataTable) return;\n",
|
||||||
|
"\n",
|
||||||
|
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
||||||
|
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
||||||
|
" + ' to learn more about interactive tables.';\n",
|
||||||
|
" element.innerHTML = '';\n",
|
||||||
|
" dataTable['output_type'] = 'display_data';\n",
|
||||||
|
" await google.colab.output.renderOutput(dataTable, element);\n",
|
||||||
|
" const docLink = document.createElement('div');\n",
|
||||||
|
" docLink.innerHTML = docLinkHtml;\n",
|
||||||
|
" element.appendChild(docLink);\n",
|
||||||
|
" }\n",
|
||||||
|
" </script>\n",
|
||||||
|
" </div>\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"<div id=\"df-32d495b9-a579-4bb8-8725-2243d77acadb\">\n",
|
||||||
|
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-32d495b9-a579-4bb8-8725-2243d77acadb')\"\n",
|
||||||
|
" title=\"Suggest charts\"\n",
|
||||||
|
" style=\"display:none;\">\n",
|
||||||
|
"\n",
|
||||||
|
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
||||||
|
" width=\"24px\">\n",
|
||||||
|
" <g>\n",
|
||||||
|
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
|
||||||
|
" </g>\n",
|
||||||
|
"</svg>\n",
|
||||||
|
" </button>\n",
|
||||||
|
"\n",
|
||||||
|
"<style>\n",
|
||||||
|
" .colab-df-quickchart {\n",
|
||||||
|
" --bg-color: #E8F0FE;\n",
|
||||||
|
" --fill-color: #1967D2;\n",
|
||||||
|
" --hover-bg-color: #E2EBFA;\n",
|
||||||
|
" --hover-fill-color: #174EA6;\n",
|
||||||
|
" --disabled-fill-color: #AAA;\n",
|
||||||
|
" --disabled-bg-color: #DDD;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" [theme=dark] .colab-df-quickchart {\n",
|
||||||
|
" --bg-color: #3B4455;\n",
|
||||||
|
" --fill-color: #D2E3FC;\n",
|
||||||
|
" --hover-bg-color: #434B5C;\n",
|
||||||
|
" --hover-fill-color: #FFFFFF;\n",
|
||||||
|
" --disabled-bg-color: #3B4455;\n",
|
||||||
|
" --disabled-fill-color: #666;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .colab-df-quickchart {\n",
|
||||||
|
" background-color: var(--bg-color);\n",
|
||||||
|
" border: none;\n",
|
||||||
|
" border-radius: 50%;\n",
|
||||||
|
" cursor: pointer;\n",
|
||||||
|
" display: none;\n",
|
||||||
|
" fill: var(--fill-color);\n",
|
||||||
|
" height: 32px;\n",
|
||||||
|
" padding: 0;\n",
|
||||||
|
" width: 32px;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .colab-df-quickchart:hover {\n",
|
||||||
|
" background-color: var(--hover-bg-color);\n",
|
||||||
|
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
||||||
|
" fill: var(--button-hover-fill-color);\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .colab-df-quickchart-complete:disabled,\n",
|
||||||
|
" .colab-df-quickchart-complete:disabled:hover {\n",
|
||||||
|
" background-color: var(--disabled-bg-color);\n",
|
||||||
|
" fill: var(--disabled-fill-color);\n",
|
||||||
|
" box-shadow: none;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .colab-df-spinner {\n",
|
||||||
|
" border: 2px solid var(--fill-color);\n",
|
||||||
|
" border-color: transparent;\n",
|
||||||
|
" border-bottom-color: var(--fill-color);\n",
|
||||||
|
" animation:\n",
|
||||||
|
" spin 1s steps(1) infinite;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" @keyframes spin {\n",
|
||||||
|
" 0% {\n",
|
||||||
|
" border-color: transparent;\n",
|
||||||
|
" border-bottom-color: var(--fill-color);\n",
|
||||||
|
" border-left-color: var(--fill-color);\n",
|
||||||
|
" }\n",
|
||||||
|
" 20% {\n",
|
||||||
|
" border-color: transparent;\n",
|
||||||
|
" border-left-color: var(--fill-color);\n",
|
||||||
|
" border-top-color: var(--fill-color);\n",
|
||||||
|
" }\n",
|
||||||
|
" 30% {\n",
|
||||||
|
" border-color: transparent;\n",
|
||||||
|
" border-left-color: var(--fill-color);\n",
|
||||||
|
" border-top-color: var(--fill-color);\n",
|
||||||
|
" border-right-color: var(--fill-color);\n",
|
||||||
|
" }\n",
|
||||||
|
" 40% {\n",
|
||||||
|
" border-color: transparent;\n",
|
||||||
|
" border-right-color: var(--fill-color);\n",
|
||||||
|
" border-top-color: var(--fill-color);\n",
|
||||||
|
" }\n",
|
||||||
|
" 60% {\n",
|
||||||
|
" border-color: transparent;\n",
|
||||||
|
" border-right-color: var(--fill-color);\n",
|
||||||
|
" }\n",
|
||||||
|
" 80% {\n",
|
||||||
|
" border-color: transparent;\n",
|
||||||
|
" border-right-color: var(--fill-color);\n",
|
||||||
|
" border-bottom-color: var(--fill-color);\n",
|
||||||
|
" }\n",
|
||||||
|
" 90% {\n",
|
||||||
|
" border-color: transparent;\n",
|
||||||
|
" border-bottom-color: var(--fill-color);\n",
|
||||||
|
" }\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"\n",
|
||||||
|
" <script>\n",
|
||||||
|
" async function quickchart(key) {\n",
|
||||||
|
" const quickchartButtonEl =\n",
|
||||||
|
" document.querySelector('#' + key + ' button');\n",
|
||||||
|
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
|
||||||
|
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
|
||||||
|
" try {\n",
|
||||||
|
" const charts = await google.colab.kernel.invokeFunction(\n",
|
||||||
|
" 'suggestCharts', [key], {});\n",
|
||||||
|
" } catch (error) {\n",
|
||||||
|
" console.error('Error during call to suggestCharts:', error);\n",
|
||||||
|
" }\n",
|
||||||
|
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
|
||||||
|
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
|
||||||
|
" }\n",
|
||||||
|
" (() => {\n",
|
||||||
|
" let quickchartButtonEl =\n",
|
||||||
|
" document.querySelector('#df-32d495b9-a579-4bb8-8725-2243d77acadb button');\n",
|
||||||
|
" quickchartButtonEl.style.display =\n",
|
||||||
|
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
||||||
|
" })();\n",
|
||||||
|
" </script>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
" </div>\n",
|
||||||
|
" </div>\n"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" MSSubClass LotArea OverallCond YearBuilt YearRemodAdd BsmtFinSF2 \\\n",
|
||||||
|
"0 60 8450 5 2003 2003 0.0 \n",
|
||||||
|
"1 20 9600 8 1976 1976 0.0 \n",
|
||||||
|
"2 60 11250 5 2001 2002 0.0 \n",
|
||||||
|
"3 70 9550 5 1915 1970 0.0 \n",
|
||||||
|
"4 60 14260 5 2000 2000 0.0 \n",
|
||||||
|
"\n",
|
||||||
|
" TotalBsmtSF SalePrice MSZoning_C (all) MSZoning_FV ... \\\n",
|
||||||
|
"0 856.0 208500.0 0.0 0.0 ... \n",
|
||||||
|
"1 1262.0 181500.0 0.0 0.0 ... \n",
|
||||||
|
"2 920.0 223500.0 0.0 0.0 ... \n",
|
||||||
|
"3 756.0 140000.0 0.0 0.0 ... \n",
|
||||||
|
"4 1145.0 250000.0 0.0 0.0 ... \n",
|
||||||
|
"\n",
|
||||||
|
" Exterior1st_CemntBd Exterior1st_HdBoard Exterior1st_ImStucc \\\n",
|
||||||
|
"0 0.0 0.0 0.0 \n",
|
||||||
|
"1 0.0 0.0 0.0 \n",
|
||||||
|
"2 0.0 0.0 0.0 \n",
|
||||||
|
"3 0.0 0.0 0.0 \n",
|
||||||
|
"4 0.0 0.0 0.0 \n",
|
||||||
|
"\n",
|
||||||
|
" Exterior1st_MetalSd Exterior1st_Plywood Exterior1st_Stone \\\n",
|
||||||
|
"0 0.0 0.0 0.0 \n",
|
||||||
|
"1 1.0 0.0 0.0 \n",
|
||||||
|
"2 0.0 0.0 0.0 \n",
|
||||||
|
"3 0.0 0.0 0.0 \n",
|
||||||
|
"4 0.0 0.0 0.0 \n",
|
||||||
|
"\n",
|
||||||
|
" Exterior1st_Stucco Exterior1st_VinylSd Exterior1st_Wd Sdng \\\n",
|
||||||
|
"0 0.0 1.0 0.0 \n",
|
||||||
|
"1 0.0 0.0 0.0 \n",
|
||||||
|
"2 0.0 1.0 0.0 \n",
|
||||||
|
"3 0.0 0.0 1.0 \n",
|
||||||
|
"4 0.0 1.0 0.0 \n",
|
||||||
|
"\n",
|
||||||
|
" Exterior1st_WdShing \n",
|
||||||
|
"0 0.0 \n",
|
||||||
|
"1 0.0 \n",
|
||||||
|
"2 0.0 \n",
|
||||||
|
"3 0.0 \n",
|
||||||
|
"4 0.0 \n",
|
||||||
|
"\n",
|
||||||
|
"[5 rows x 38 columns]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 43,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"OH_encoder = OneHotEncoder(sparse=False)\n",
|
||||||
|
"OH_cols = pd.DataFrame(OH_encoder.fit_transform(new_data[object_cols]))\n",
|
||||||
|
"OH_cols.index = new_data.index\n",
|
||||||
|
"OH_cols.columns = OH_encoder.get_feature_names_out()\n",
|
||||||
|
"df_final = new_data.drop(object_cols, axis=1)\n",
|
||||||
|
"df_final = pd.concat([df_final, OH_cols], axis=1)\n",
|
||||||
|
"df_final.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "eZDFB6FZv1k-"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Scaling"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "Ve81LFh9wEs0"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"scaler = MinMaxScaler()\n",
|
||||||
|
"df_normalized = pd.DataFrame(scaler.fit_transform(df_final), columns=df_final.columns)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "TCfu66R_cyAJ"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"# Data splitting"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "wiiNfRQRgR1G"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"X, y = df_normalized.loc[:, df_normalized.columns != 'SalePrice'], df_normalized[['SalePrice']]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "gMCC4I4bgjf2"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "o5k0PNG_pMGY",
|
||||||
|
"outputId": "59927321-5524-4bc6-cf99-b579566c6527"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"((2330, 37), (583, 37), (2330, 1), (583, 1))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 47,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"X_train.shape, X_test.shape, y_train.shape, y_test.shape"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "yKhUdAyntrv8",
|
||||||
|
"outputId": "d60ccb04-1f08-4cf2-8503-0f5d980322c3"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"(755000.0, 34900.0)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 48,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"label_max, label_min = df[\"SalePrice\"].max(), df[\"SalePrice\"].min()\n",
|
||||||
|
"label_max,label_min"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "7c7wVOvwtk7V"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"y_reg_test = y_test * (label_max - label_min) + label_min\n",
|
||||||
|
"feature_names=X_train.columns"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "egogzbyuca1N"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"# Model Building"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "jC2CYGELsB-G"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def init_params(n_features):\n",
|
||||||
|
" w = np.zeros((n_features, 1)) # no of weights should be 1 + no. of featuers, to account for the bias term\n",
|
||||||
|
" b = 0\n",
|
||||||
|
" return w,b\n",
|
||||||
|
"\n",
|
||||||
|
"def log_likelihood(X, y,w,b):\n",
|
||||||
|
" # Assume variance = 1\n",
|
||||||
|
" N = len(y)\n",
|
||||||
|
" y_pred = np.dot(X, w) + b\n",
|
||||||
|
" residual = y - y_pred\n",
|
||||||
|
" log_likelihood = -0.5 * N * np.log(2 * np.pi) - 0.5 * np.sum(residual ** 2)\n",
|
||||||
|
" return log_likelihood\n",
|
||||||
|
"\n",
|
||||||
|
"def update(X, y,w,b,n_iterations=400,learning_rate=0.001):\n",
|
||||||
|
" # Assume variance = 1\n",
|
||||||
|
" N = len(y)\n",
|
||||||
|
" for i in range(n_iterations):\n",
|
||||||
|
" y_pred = np.dot(X, w) + b\n",
|
||||||
|
"\n",
|
||||||
|
" dw = np.dot(X.T, (y - y_pred)) / N\n",
|
||||||
|
" db = np.sum(y - y_pred) / N\n",
|
||||||
|
"\n",
|
||||||
|
" w = w - learning_rate * dw\n",
|
||||||
|
" b = b - learning_rate * db\n",
|
||||||
|
"\n",
|
||||||
|
" return w,b\n",
|
||||||
|
"\n",
|
||||||
|
"def train_model(X, y):\n",
|
||||||
|
"\n",
|
||||||
|
" w,b=init_params(X.shape[1])\n",
|
||||||
|
"\n",
|
||||||
|
" w,b=update(X, y,w,b)\n",
|
||||||
|
"\n",
|
||||||
|
" return w,b\n",
|
||||||
|
"\n",
|
||||||
|
"def predict(X,w,b):\n",
|
||||||
|
" return np.dot(X, w) + b"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "3OPBEw2KrAH7"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"W, b = train_model(X_train.values,y_train.values)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "GfLO0M_EvFxU"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"y_pred = predict(X_test,W,b)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "jYP5Zc18vNqm"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"y_pred=y_pred * (label_max - label_min) + label_min\n",
|
||||||
|
"mae=mean_absolute_error(y_reg_test,y_pred)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "9XYUL6hpvYTA",
|
||||||
|
"outputId": "422ec562-f641-48d1-ddb6-4211fefd66ad"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"858342.4210579091"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 54,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"mae"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "PxShUdVuv5jn"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"provenance": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.9"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
@@ -65344,8 +65344,9 @@
|
|||||||
"provenance": []
|
"provenance": []
|
||||||
},
|
},
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "ai511",
|
||||||
"name": "python3"
|
"language": "python",
|
||||||
|
"name": "ai511"
|
||||||
},
|
},
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
@@ -65361,5 +65362,5 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 0
|
"nbformat_minor": 4
|
||||||
}
|
}
|
||||||
|
|||||||
1932
python/Tutorial_02.ipynb
Normal file
1932
python/Tutorial_02.ipynb
Normal file
File diff suppressed because one or more lines are too long
1316
python/Tutorial_03_Assignment.ipynb
Normal file
1316
python/Tutorial_03_Assignment.ipynb
Normal file
File diff suppressed because one or more lines are too long
88
rust/numbrs/assets/determinantMatrix.c
Normal file
88
rust/numbrs/assets/determinantMatrix.c
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
int m,n; // original matrix dimensions
|
||||||
|
|
||||||
|
int det(int B[m][n]);
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
int determinant;
|
||||||
|
register int row,column;
|
||||||
|
|
||||||
|
printf("Enter rows and columns\n");
|
||||||
|
scanf("%d%d",&m,&n);
|
||||||
|
|
||||||
|
int A[m][n];
|
||||||
|
|
||||||
|
printf("Enter matrix elements\n");
|
||||||
|
|
||||||
|
for(row = 0; row < m; row++)
|
||||||
|
for(column = 0; column < n; column++)
|
||||||
|
scanf("%d",&A[row][column]);
|
||||||
|
|
||||||
|
determinant = det(A);
|
||||||
|
|
||||||
|
printf("determinant = %d \n",determinant);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int det(int B[m][n]) {
|
||||||
|
int row_size = m;
|
||||||
|
int column_size = n;
|
||||||
|
|
||||||
|
if (row_size != column_size) {
|
||||||
|
printf("DimensionError: Operation Not Permitted \n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (row_size == 1)
|
||||||
|
return (B[0][0]);
|
||||||
|
|
||||||
|
else if (row_size == 2)
|
||||||
|
return (B[0][0]*B[1][1] - B[1][0]*B[0][1]);
|
||||||
|
|
||||||
|
else {
|
||||||
|
int minor[row_size-1][column_size-1];
|
||||||
|
int row_minor, column_minor;
|
||||||
|
int firstrow_columnindex;
|
||||||
|
int sum = 0;
|
||||||
|
|
||||||
|
register int row,column;
|
||||||
|
|
||||||
|
// exclude first row and current column
|
||||||
|
for(firstrow_columnindex = 0; firstrow_columnindex < row_size;
|
||||||
|
firstrow_columnindex++) {
|
||||||
|
|
||||||
|
row_minor = 0;
|
||||||
|
|
||||||
|
for(row = 1; row < row_size; row++) {
|
||||||
|
|
||||||
|
column_minor = 0;
|
||||||
|
|
||||||
|
for(column = 0; column < column_size; column++) {
|
||||||
|
if (column == firstrow_columnindex)
|
||||||
|
continue;
|
||||||
|
else
|
||||||
|
minor[row_minor][column_minor] = B[row][column];
|
||||||
|
|
||||||
|
column_minor++;
|
||||||
|
}
|
||||||
|
|
||||||
|
row_minor++;
|
||||||
|
}
|
||||||
|
|
||||||
|
m = row_minor;
|
||||||
|
n = column_minor;
|
||||||
|
|
||||||
|
if (firstrow_columnindex % 2 == 0)
|
||||||
|
sum += B[0][firstrow_columnindex] * det(minor);
|
||||||
|
else
|
||||||
|
sum -= B[0][firstrow_columnindex] * det(minor);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user