From 642ca5471a4ed6e5a7670fdf4ce56c41129563a7 Mon Sep 17 00:00:00 2001 From: karansingh012 Date: Mon, 26 Jan 2026 22:49:11 +0530 Subject: [PATCH] Make example notebook runnable without external dataset --- .../example_notebook.ipynb | 427 ++++++++++-------- 1 file changed, 234 insertions(+), 193 deletions(-) diff --git a/DeepLense_Regression_Yurii_Halychanskyi/example_notebook.ipynb b/DeepLense_Regression_Yurii_Halychanskyi/example_notebook.ipynb index 55097fe..811548d 100644 --- a/DeepLense_Regression_Yurii_Halychanskyi/example_notebook.ipynb +++ b/DeepLense_Regression_Yurii_Halychanskyi/example_notebook.ipynb @@ -1,43 +1,29 @@ { - "nbformat": 4, - "nbformat_minor": 2, - "metadata": { - "colab": { - "name": "example_notebook.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3.8.5 64-bit ('DeepLense-Regression-647Wbmta': pipenv)" - }, - "language_info": { - "name": "python", - "version": "3.8.5" - }, - "accelerator": "GPU", - "interpreter": { - "hash": "94ba651dc4892b9b39eff64f738fac3dc832246874c3417a1cd9874538024232" - } - }, "cells": [ { "cell_type": "markdown", - "source": [ - "# DeepLense Regression\n", - "\n", - "A FastAI-based tool for performing regression on strong lensing images to predict axion mass density of galaxies.\n" - ], "metadata": { "id": "OCqIqbb8Btko", "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "# DeepLense Regression\n", + "\n", + "A FastAI-based tool for performing regression on strong lensing images to predict axion mass density of galaxies.\n" + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "id": "HluHTYZMKp1Y", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], "source": [ "from fastai.basics import *\n", "from fastai.vision.all import *\n", @@ -62,87 +48,114 @@ "matplotlib.use('Agg')\n", "%matplotlib inline\n", "warnings.filterwarnings('ignore')" - ], - "outputs": [], - "metadata": { - "id": "HluHTYZMKp1Y", - "pycharm": { - "name": "#%%\n" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "## Load the Data" - ], "metadata": { "id": "5k6JCWGNlwrQ", "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "## Load the Data" + ] }, { "cell_type": "code", "execution_count": null, - "source": [ - "path_to_images = '/path_to_images.py'\n", - "path_to_masses = '/path_to_masses.py'\n", - "\n", - "image_shape = (150, 150)\n", - "# Number of images\n", - "images_num = 28000\n", - "# Load the dataset\n", - "# Memmap loads images to RAM only when they are used\n", - "images = np.memmap(path_to_images,\n", - " dtype='uint16',\n", - " mode='r',\n", - " shape=(images_num,*images_num))\n", - "\n", - "labels = np.memmap(path_to_masses,\n", - " dtype='float32',\n", - " mode='r',\n", - " shape=(images_num,1))" - ], - "outputs": [], "metadata": { "id": "GDfKapGYl6WV", "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "# --------------------------------------------------\n", + "# Dataset paths (placeholders)\n", + "# --------------------------------------------------\n", + "# path_to_images = '/path_to_images.py'\n", + "# path_to_masses = '/path_to_masses.py'\n", + "\n", + "# NOTE:\n", + "# The full dataset is not included in this repository due to size constraints.\n", + "# Users should provide valid dataset paths or use synthetic data below\n", + "# to test and understand the pipeline.\n", + "\n", + "# --------------------------------------------------\n", + "# Synthetic data (for testing only)\n", + "# --------------------------------------------------\n", + "images_num = 100\n", + "image_shape = (150, 150)\n", + "\n", + "images = np.random.randint(\n", + " 0, 65535,\n", + " size=(images_num, 1, *image_shape),\n", + " dtype=np.uint16\n", + ")\n", + "\n", + "labels = np.random.rand(images_num, 1).astype(np.float32)\n", + "\n", + "# --------------------------------------------------\n", + "# Real dataset loading (example only)\n", + "# --------------------------------------------------\n", + "# images_num = 28000\n", + "# image_shape = (150, 150)\n", + "#\n", + "# images = np.memmap(\n", + "# path_to_images,\n", + "# dtype='uint16',\n", + "# mode='r',\n", + "# shape=(images_num, *image_shape)\n", + "# )\n", + "#\n", + "# labels = np.memmap(\n", + "# path_to_masses,\n", + "# dtype='float32',\n", + "# mode='r',\n", + "# shape=(images_num, 1)\n", + "# )" + ] }, { "cell_type": "code", "execution_count": null, - "source": [ - "print(f'Shape of images: {images.shape}')\n", - "print(f'Shape of masses: {labels.shape}')" - ], - "outputs": [], "metadata": { "id": "eCVJbCunnNil", "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "print(f'Shape of images: {images.shape}')\n", + "print(f'Shape of masses: {labels.shape}')" + ] }, { "cell_type": "markdown", - "source": [ - "## Split the data" - ], "metadata": { "id": "XLDBoF21omFn", "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "## Split the data" + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "id": "lalDUwD-nGPd", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], "source": [ "np.random.seed(234)\n", "num_of_images = labels.shape[0]\n", @@ -153,46 +166,46 @@ "train_indx = permutated_indx[:max_indx_of_train_images]\n", "valid_indx = permutated_indx[max_indx_of_train_images:max_indx_of_valid_images]\n", "test_indx = permutated_indx[max_indx_of_valid_images:max_indx_num_of_test_images]" - ], - "outputs": [], - "metadata": { - "id": "lalDUwD-nGPd", - "pycharm": { - "name": "#%%\n" - } - } + ] }, { "cell_type": "code", "execution_count": null, - "source": [ - "print(f'Number of images in train: {int(num_of_images*0.85)}')\n", - "print(f'Number of images in valid: {int(num_of_images*0.1)}')\n", - "print(f'Number of images in test: {int(num_of_images*0.05)}')" - ], - "outputs": [], "metadata": { "id": "Spf4o2dVoMkS", "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "print(f'Number of images in train: {int(num_of_images*0.85)}')\n", + "print(f'Number of images in valid: {int(num_of_images*0.1)}')\n", + "print(f'Number of images in test: {int(num_of_images*0.05)}')" + ] }, { "cell_type": "markdown", - "source": [ - "## Transforms" - ], "metadata": { "id": "Dw5h65rto2ae", "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "## Transforms" + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "id": "bTHEkFPCo4PE", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], "source": [ "base_image_transforms = [\n", " transforms.Resize(image_shape)\n", @@ -202,30 +215,30 @@ " transforms.RandomHorizontalFlip(),\n", " transforms.RandomRotation(degrees=(0,360))\n", "]" - ], - "outputs": [], - "metadata": { - "id": "bTHEkFPCo4PE", - "pycharm": { - "name": "#%%\n" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "## Create a Dataset" - ], "metadata": { "id": "9cB-XIh2pAZB", "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "## Create a Dataset" + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "id": "uQ-tssFppDA3", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], "source": [ "train_dataset = RegressionNumpyArrayDataset(images, labels, train_indx,\n", " transforms.Compose(base_image_transforms+rotation_image_transofrms))\n", @@ -233,86 +246,86 @@ " transforms.Compose(base_image_transforms))\n", "test_dataset = RegressionNumpyArrayDataset(images, labels, test_indx, \n", " transforms.Compose(base_image_transforms))" - ], - "outputs": [], - "metadata": { - "id": "uQ-tssFppDA3", - "pycharm": { - "name": "#%%\n" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "## Create a DataLoader" - ], "metadata": { "id": "y00Nc8kupUHB", "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "## Create a DataLoader" + ] }, { "cell_type": "code", "execution_count": null, - "source": [ - "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n", - "print(f'Device: {device}')\n", - "batch_size = 64\n", - "dls = DataLoaders.from_dsets(train_dataset,valid_dataset,batch_size=batch_size, device=device, num_workers=2)" - ], - "outputs": [], "metadata": { "id": "Pe1RPdMEpWMQ", "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n", + "print(f'Device: {device}')\n", + "batch_size = 64\n", + "dls = DataLoaders.from_dsets(train_dataset,valid_dataset,batch_size=batch_size, device=device, num_workers=2)" + ] }, { "cell_type": "markdown", - "source": [ - "## Model Architecture" - ], "metadata": { "id": "yOaDMkXtpet-", "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "## Model Architecture" + ] }, { "cell_type": "code", "execution_count": null, - "source": [ - "torch.manual_seed(50)\n", - "model = xresnet_hybrid101(n_out=1, sa=True, act_cls=Mish_layer, c_in = 1,device=device)" - ], - "outputs": [], "metadata": { "id": "3RB5zMl5pTUq", "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "torch.manual_seed(50)\n", + "model = xresnet_hybrid101(n_out=1, sa=True, act_cls=Mish_layer, c_in = 1,device=device)" + ] }, { "cell_type": "markdown", - "source": [ - "## Create a Learner" - ], "metadata": { "id": "3b6fgS4hp4CJ", "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "## Create a Learner" + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "id": "tKF1VwgtorhN", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], "source": [ "learn = Learner(\n", " dls, \n", @@ -322,128 +335,129 @@ " metrics=[mae_loss_wgtd],\n", " model_dir = ''\n", ")" - ], - "outputs": [], - "metadata": { - "id": "tKF1VwgtorhN", - "pycharm": { - "name": "#%%\n" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "## Find a Learning Rate" - ], "metadata": { "id": "IX3eOWDfqB6M", "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "## Find a Learning Rate" + ] }, { "cell_type": "code", "execution_count": null, - "source": [ - "learn.lr_find()" - ], - "outputs": [], "metadata": { "id": "yfDbyubnqEJR", "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "learn.lr_find()" + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "id": "YfvOShQCqANp", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], "source": [ "num_of_epochs = 1\n", "lr = 1e-2\n", + "\n", "learn.fit_one_cycle(num_of_epochs,lr,cbs=\n", " [ShowGraphCallback,\n", " SaveModelCallback(monitor='mae_loss_wgtd',fname='best_model')])" - ], - "outputs": [], - "metadata": { - "id": "YfvOShQCqANp", - "pycharm": { - "name": "#%%\n" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "## Load the best model" - ], "metadata": { "id": "kWGhVrLSsBwK", "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "## Load the best model" + ] }, { "cell_type": "code", "execution_count": null, - "source": [ - "learn.load('best_model',device=device)\n", - "learn.model = learn.model.to(device)" - ], - "outputs": [], "metadata": { "id": "B2jl8XvPjzps", "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "learn.load('best_model',device=device)\n", + "learn.model = learn.model.to(device)" + ] }, { "cell_type": "markdown", - "source": [ - "## Get Predictions for the Test Dataset" - ], "metadata": { "id": "6bQHmnknsgr1", "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "## Get Predictions for the Test Dataset" + ] }, { "cell_type": "code", "execution_count": null, - "source": [ - "test_dl = DataLoader(test_dataset, batch_size=1,shuffle=False,device=device)\n", - "m_pred,m_true = learn.get_preds(dl=test_dl,reorder=False)" - ], - "outputs": [], "metadata": { "id": "k9JbpmI0sktm", "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "test_dl = DataLoader(test_dataset, batch_size=1,shuffle=False,device=device)\n", + "m_pred,m_true = learn.get_preds(dl=test_dl,reorder=False)" + ] }, { "cell_type": "markdown", - "source": [ - "## Plot the results" - ], "metadata": { "id": "Ew5vvLeSs0iu", "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "## Plot the results" + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "id": "SjWckOWpogoW", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], "source": [ "test_mae = mae_loss_wgtd(m_pred,m_true)\n", "plt.figure(figsize=(6,6),dpi=100)\n", @@ -453,14 +467,41 @@ "plt.xlabel('Observed mass')\n", "plt.ylabel('Predicted mass')\n", "plt.text(1,4, 'MAE: {:.4f}'.format(test_mae))" - ], + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "metadata": { - "id": "SjWckOWpogoW", - "pycharm": { - "name": "#%%\n" - } - } + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "example_notebook.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 2 +}