From 4eb3d40bcaa18edcd912be049a7a80824aecd110 Mon Sep 17 00:00:00 2001
From: Georige <145737474+Georige@users.noreply.github.com>
Date: Sun, 1 Feb 2026 17:56:44 +0800
Subject: [PATCH 1/6] bug
---
ldm/models/autoencoder.py | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/ldm/models/autoencoder.py b/ldm/models/autoencoder.py
index 6a9c4f4..028ebb7 100644
--- a/ldm/models/autoencoder.py
+++ b/ldm/models/autoencoder.py
@@ -3,7 +3,7 @@
import torch.nn.functional as F
from contextlib import contextmanager
-from taming.modules.vqvae.quantize import VectorQuantizer2 as VectorQuantizer
+from taming.modules.vqvae.quantize import VectorQuantizer
from ldm.modules.diffusionmodules.model import Encoder, Decoder
from ldm.modules.distributions.distributions import DiagonalGaussianDistribution
@@ -37,8 +37,7 @@ def __init__(self,
self.decoder = Decoder(**ddconfig)
self.loss = instantiate_from_config(lossconfig)
self.quantize = VectorQuantizer(n_embed, embed_dim, beta=0.25,
- remap=remap,
- sane_index_shape=sane_index_shape)
+ )
self.quant_conv = torch.nn.Conv2d(ddconfig["z_channels"], embed_dim, 1)
self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
if colorize_nlabels is not None:
@@ -76,7 +75,7 @@ def ema_scope(self, context=None):
print(f"{context}: Restored training weights")
def init_from_ckpt(self, path, ignore_keys=list()):
- sd = torch.load(path, map_location="cpu")["state_dict"]
+ sd = torch.load(path, map_location="cpu",weights_only=False)["state_dict"]
keys = list(sd.keys())
for k in keys:
for ik in ignore_keys:
From 13277b5f95ffb0ca82d89d76433c1872688ed0de Mon Sep 17 00:00:00 2001
From: Georige <145737474+Georige@users.noreply.github.com>
Date: Sun, 1 Feb 2026 17:59:17 +0800
Subject: [PATCH 2/6] Update ddpm.py
---
ldm/models/diffusion/ddpm.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ldm/models/diffusion/ddpm.py b/ldm/models/diffusion/ddpm.py
index 18383ec..946c9ff 100644
--- a/ldm/models/diffusion/ddpm.py
+++ b/ldm/models/diffusion/ddpm.py
@@ -23,7 +23,7 @@
from ldm.modules.distributions.distributions import normal_kl, DiagonalGaussianDistribution
from ldm.models.autoencoder import VQModelInterface, IdentityFirstStage, AutoencoderKL
from ldm.modules.diffusionmodules.util import make_beta_schedule, extract_into_tensor, noise_like
-from ldm.models.diffusion.ddim import DDIMSampler
+from ldm.models.diffusion.diffstategrad_ddim import DDIMSampler
__conditioning_keys__ = {'concat': 'c_concat',
From c673e33f906cb16081f946f9abcf105ace85fed5 Mon Sep 17 00:00:00 2001
From: Georige <145737474+Georige@users.noreply.github.com>
Date: Sun, 1 Feb 2026 18:03:29 +0800
Subject: [PATCH 3/6] Update diffstategrad_sample_condition.py
---
diffstategrad_sample_condition.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/diffstategrad_sample_condition.py b/diffstategrad_sample_condition.py
index a45ed6f..9c2954f 100644
--- a/diffstategrad_sample_condition.py
+++ b/diffstategrad_sample_condition.py
@@ -144,7 +144,7 @@ def make_folder(sample_path, opt):
parser.add_argument('--ddim_eta', default=0.0, type=float)
parser.add_argument('--n_samples_per_class', default=1, type=int)
parser.add_argument('--ddim_scale', default=1.0, type=float)
-parser.add_argument('--image_id', default=60000, type=int)
+parser.add_argument('--image_id', default=60004, type=int)
parser.add_argument('--var_cutoff', default=0.99, type=float)
parser.add_argument('--pixel_lr', default=1e-2, type=float)
parser.add_argument('--latent_lr', default=5e-3, type=float)
From 18fac33caf85dab6cde6f369e04a0ff74874303f Mon Sep 17 00:00:00 2001
From: Georige <145737474+Georige@users.noreply.github.com>
Date: Sun, 1 Feb 2026 18:06:44 +0800
Subject: [PATCH 4/6] =?UTF-8?q?=E4=BD=BF=E7=94=A8=20Colab=20=E5=88=9B?=
=?UTF-8?q?=E5=BB=BA=E8=80=8C=E6=88=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
colab.ipynb | 438 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 438 insertions(+)
create mode 100644 colab.ipynb
diff --git a/colab.ipynb b/colab.ipynb
new file mode 100644
index 0000000..1e0027e
--- /dev/null
+++ b/colab.ipynb
@@ -0,0 +1,438 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "id": "l-fJd1vOXhyE",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "edd98885-16b4-4f3b-fc23-811e81f61f1a"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Cloning into 'DiffStateGrad'...\n",
+ "remote: Enumerating objects: 502, done.\u001b[K\n",
+ "remote: Counting objects: 100% (502/502), done.\u001b[K\n",
+ "remote: Compressing objects: 100% (382/382), done.\u001b[K\n",
+ "remote: Total 502 (delta 205), reused 329 (delta 102), pack-reused 0 (from 0)\u001b[K\n",
+ "Receiving objects: 100% (502/502), 18.86 MiB | 21.56 MiB/s, done.\n",
+ "Resolving deltas: 100% (205/205), done.\n"
+ ]
+ }
+ ],
+ "source": [
+ "!git clone https://github.com/rzirvi1665/DiffStateGrad.git"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "id": "0YkGfhsIX7wz",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "43b1789a-61f5-45b6-857f-77c77a2bfdb4"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "/content/DiffStateGrad\n"
+ ]
+ }
+ ],
+ "source": [
+ "cd /content/DiffStateGrad"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "id": "gcyW21fxX-_J"
+ },
+ "outputs": [],
+ "source": [
+ "!mkdir -p models/ldm"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "id": "OS1g6QlIYHBl",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "17f5123c-3995-4e81-d466-9dde048c06a9"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "DEBUG output created by Wget 1.21.2 on linux-gnu.\n",
+ "\n",
+ "Reading HSTS entries from /root/.wget-hsts\n",
+ "URI encoding = ‘UTF-8’\n",
+ "Converted file name 'ffhq.zip' (UTF-8) -> 'ffhq.zip' (UTF-8)\n",
+ "--2026-02-01 09:53:52-- https://ommer-lab.com/files/latent-diffusion/ffhq.zip\n",
+ "Resolving ommer-lab.com (ommer-lab.com)... 141.84.41.65\n",
+ "Caching ommer-lab.com => 141.84.41.65\n",
+ "Connecting to ommer-lab.com (ommer-lab.com)|141.84.41.65|:443... connected.\n",
+ "Created socket 3.\n",
+ "Releasing 0x00005c9b3e555010 (new refcount 1).\n",
+ "Initiating SSL handshake.\n",
+ "Handshake successful; connected socket 3 to SSL handle 0x00005c9b3e556b10\n",
+ "certificate:\n",
+ " subject: CN=ommer-lab.com\n",
+ " issuer: CN=R12,O=Let's Encrypt,C=US\n",
+ "X509 certificate successfully verified and matches host ommer-lab.com\n",
+ "\n",
+ "---request begin---\n",
+ "GET /files/latent-diffusion/ffhq.zip HTTP/1.1\n",
+ "Host: ommer-lab.com\n",
+ "User-Agent: Wget/1.21.2\n",
+ "Accept: */*\n",
+ "Accept-Encoding: identity\n",
+ "Connection: Keep-Alive\n",
+ "\n",
+ "---request end---\n",
+ "HTTP request sent, awaiting response... \n",
+ "---response begin---\n",
+ "HTTP/1.1 200 OK\n",
+ "Date: Sun, 01 Feb 2026 09:53:52 GMT\n",
+ "Server: Apache/2.4.52 (Ubuntu)\n",
+ "Last-Modified: Mon, 21 Feb 2022 11:25:33 GMT\n",
+ "ETag: \"85777df6-5d8857da75fd3\"\n",
+ "Accept-Ranges: bytes\n",
+ "Content-Length: 2239200758\n",
+ "Keep-Alive: timeout=5, max=100\n",
+ "Connection: Keep-Alive\n",
+ "Content-Type: application/zip\n",
+ "\n",
+ "---response end---\n",
+ "200 OK\n",
+ "Registered socket 3 for persistent reuse.\n",
+ "Length: 2239200758 (2.1G) [application/zip]\n",
+ "Saving to: ‘./models/ldm/ffhq.zip’\n",
+ "\n",
+ "ffhq.zip 100%[===================>] 2.08G 22.1MB/s in 1m 58s \n",
+ "\n",
+ "2026-02-01 09:55:50 (18.1 MB/s) - ‘./models/ldm/ffhq.zip’ saved [2239200758/2239200758]\n",
+ "\n",
+ "URI encoding = ‘UTF-8’\n",
+ "Converted file name 'index.html' (UTF-8) -> 'index.html' (UTF-8)\n",
+ "--2026-02-01 09:55:50-- http://unzip/\n",
+ "Resolving unzip (unzip)... failed: Name or service not known.\n",
+ "wget: unable to resolve host address ‘unzip’\n",
+ "URI encoding = ‘UTF-8’\n",
+ "Converted file name 'ffhq.zip' (UTF-8) -> 'ffhq.zip' (UTF-8)\n",
+ "--2026-02-01 09:55:50-- http://models/ldm/ffhq.zip\n",
+ "Resolving models (models)... failed: Name or service not known.\n",
+ "wget: unable to resolve host address ‘models’\n",
+ "URI encoding = ‘UTF-8’\n",
+ "Converted file name 'ldm' (UTF-8) -> 'ldm' (UTF-8)\n",
+ "--2026-02-01 09:55:50-- http://./models/ldm\n",
+ "Resolving . (.)... failed: No address associated with hostname.\n",
+ "wget: unable to resolve host address ‘.’\n",
+ "FINISHED --2026-02-01 09:55:50--\n",
+ "Total wall clock time: 1m 59s\n",
+ "Downloaded: 1 files, 2.1G in 1m 58s (18.1 MB/s)\n"
+ ]
+ }
+ ],
+ "source": [
+ "!wget https://ommer-lab.com/files/latent-diffusion/ffhq.zip -P ./models/ldm unzip models/ldm/ffhq.zip -d ./models/ldm"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "id": "OP3DAnw_arPQ",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "9b6772ce-4d15-4375-978e-2943cb7ed562"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Archive: models/ldm/ffhq.zip\n",
+ " inflating: ./models/ldm/model.ckpt \n"
+ ]
+ }
+ ],
+ "source": [
+ "!unzip models/ldm/ffhq.zip -d ./models/ldm"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "id": "6lfhOedYY4Cw",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "496a8a63-9e4b-4230-8d69-9fbb49b9a566"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "--2026-02-01 09:56:21-- https://ommer-lab.com/files/latent-diffusion/vq-f4.zip\n",
+ "Resolving ommer-lab.com (ommer-lab.com)... 141.84.41.65\n",
+ "Connecting to ommer-lab.com (ommer-lab.com)|141.84.41.65|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 696655056 (664M) [application/zip]\n",
+ "Saving to: ‘./models/first_stage_models/vq-f4/vq-f4.zip’\n",
+ "\n",
+ "vq-f4.zip 100%[===================>] 664.38M 21.9MB/s in 38s \n",
+ "\n",
+ "2026-02-01 09:57:00 (17.4 MB/s) - ‘./models/first_stage_models/vq-f4/vq-f4.zip’ saved [696655056/696655056]\n",
+ "\n",
+ "Archive: models/first_stage_models/vq-f4/vq-f4.zip\n",
+ " inflating: ./models/first_stage_models/vq-f4/model.ckpt \n"
+ ]
+ }
+ ],
+ "source": [
+ "!mkdir -p models/first_stage_models/vq-f4\n",
+ "!wget https://ommer-lab.com/files/latent-diffusion/vq-f4.zip -P ./models/first_stage_models/vq-f4\n",
+ "!unzip models/first_stage_models/vq-f4/vq-f4.zip -d ./models/first_stage_models/vq-f4"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "2IuKi9__amlG"
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "id": "mCkKUFupZugA",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "6836aa22-f75e-4ab1-e4bb-39a511b5558a"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Cloning into 'bkse'...\n",
+ "remote: Enumerating objects: 577, done.\u001b[K\n",
+ "remote: Counting objects: 100% (577/577), done.\u001b[K\n",
+ "remote: Compressing objects: 100% (328/328), done.\u001b[K\n",
+ "remote: Total 577 (delta 334), reused 461 (delta 232), pack-reused 0 (from 0)\u001b[K\n",
+ "Receiving objects: 100% (577/577), 1.05 MiB | 4.42 MiB/s, done.\n",
+ "Resolving deltas: 100% (334/334), done.\n",
+ "Cloning into 'motionblur'...\n",
+ "remote: Enumerating objects: 36, done.\u001b[K\n",
+ "remote: Total 36 (delta 0), reused 0 (delta 0), pack-reused 36 (from 1)\u001b[K\n",
+ "Receiving objects: 100% (36/36), 511.08 KiB | 2.19 MiB/s, done.\n",
+ "Resolving deltas: 100% (12/12), done.\n"
+ ]
+ }
+ ],
+ "source": [
+ "!git clone https://github.com/VinAIResearch/blur-kernel-space-exploring bkse\n",
+ "\n",
+ "!git clone https://github.com/LeviBorodenko/motionblur motionblur"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "uXCv0ulNZysA"
+ },
+ "outputs": [],
+ "source": [
+ "Install dependencies via"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "axQF9EtCZz_N"
+ },
+ "outputs": [],
+ "source": [
+ "!conda env create -f environment.yaml"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "E4Zj_xeBaOc6"
+ },
+ "outputs": [],
+ "source": [
+ "!pip install lpips"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Xci7NlggaVg_"
+ },
+ "outputs": [],
+ "source": [
+ "!pip install pytorch_lightning"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "EjyXS3fLb5Q0"
+ },
+ "outputs": [],
+ "source": [
+ "!pip install \"pip<24.0\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "0P2Dvwk4cR2-"
+ },
+ "outputs": [],
+ "source": [
+ "!pip install pytorch-lightning==1.7.7"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Eh62WLWlcvAY"
+ },
+ "outputs": [],
+ "source": [
+ "!pip install taming-transformers==0.0.1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "_ewDIMS8de7j"
+ },
+ "outputs": [],
+ "source": [
+ "!pip install taming-transformers==0.0.1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "s03oh4z0cjRs"
+ },
+ "outputs": [],
+ "source": [
+ "!pip install torchmetrics==0.9.3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "h53FwWZ_aHrb",
+ "outputId": "c338b23b-3f83-4762-a314-7879618c00ec"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "2026-02-01 10:04:01.171669: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
+ "E0000 00:00:1769940241.191691 10657 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
+ "E0000 00:00:1769940241.197727 10657 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+ "W0000 00:00:1769940241.213039 10657 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
+ "W0000 00:00:1769940241.213063 10657 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
+ "W0000 00:00:1769940241.213067 10657 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
+ "W0000 00:00:1769940241.213072 10657 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
+ "2026-02-01 10:04:01.217894: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+ "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+ "Global seed set to 42\n",
+ "Device set to cuda:0.\n",
+ "Loading model from models/ldm/model.ckpt\n",
+ "LatentDiffusion: Running in eps-prediction mode\n",
+ "DiffusionWrapper has 274.06 M params.\n",
+ "Keeping EMAs of 370.\n",
+ "making attention of type 'vanilla' with 512 in_channels\n",
+ "Working with z of shape (1, 3, 64, 64) = 12288 dimensions.\n",
+ "making attention of type 'vanilla' with 512 in_channels\n",
+ "Restored from models/first_stage_models/vq-f4/model.ckpt with 0 missing and 55 unexpected keys\n",
+ "Training LatentDiffusion as an unconditional model.\n",
+ "Operation: gaussian_blur / Noise: gaussian\n",
+ "Conditioning sampler : resample\n",
+ "Inference for image 60004\n",
+ "Data shape for DDIM sampling is (1, 3, 64, 64), eta 0.0\n",
+ "DDIM Sampler: 38% 189/500 [02:00<03:44, 1.39it/s]"
+ ]
+ }
+ ],
+ "source": [
+ "!python3 diffstategrad_sample_condition.py"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": [],
+ "authorship_tag": "ABX9TyOcfrP0A7K8uCc6Us91iHvx",
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
From fbe5806bd04f7eaede48ce5052e5215fb3fb7853 Mon Sep 17 00:00:00 2001
From: Georige <145737474+Georige@users.noreply.github.com>
Date: Sun, 1 Feb 2026 19:56:32 +0800
Subject: [PATCH 5/6] rsvd_block
---
adaptive_range_finder.py | 197 +++++++++++++++++++++++++++++++++++++++
randomized_svd.py | 162 ++++++++++++++++++++++++++++++++
2 files changed, 359 insertions(+)
create mode 100644 adaptive_range_finder.py
create mode 100644 randomized_svd.py
diff --git a/adaptive_range_finder.py b/adaptive_range_finder.py
new file mode 100644
index 0000000..eba56c1
--- /dev/null
+++ b/adaptive_range_finder.py
@@ -0,0 +1,197 @@
+import numpy as np
+import torch
+from typing import Union, List
+
+def adaptive_randomized_range_finder(
+ A: Union[np.ndarray, torch.Tensor],
+ epsilon: float,
+ r: int = 10
+) -> Union[np.ndarray, torch.Tensor]:
+ """
+ 实现算法 4.2: 自适应随机化 Range Finder (PyTorch/NumPy 通用版)。
+
+ 该函数计算矩阵 A 的正交基 Q,使得近似误差在概率上小于 epsilon。
+ 自动适配 CPU(NumPy) 或 GPU(PyTorch)。
+ """
+
+ # --- 1. 环境检测与适配 ---
+ is_torch = False
+ device = None
+ dtype = None
+
+ if isinstance(A, torch.Tensor):
+ is_torch = True
+ device = A.device
+ dtype = A.dtype
+ # 获取维度
+ m, n = A.shape
+ else:
+ # NumPy 模式
+ m, n = A.shape
+ dtype = A.dtype
+
+ # --- 2. 辅助函数 (屏蔽框架差异) ---
+ def make_random(shape):
+ if is_torch:
+ return torch.randn(shape, device=device, dtype=dtype)
+ else:
+ return np.random.normal(size=shape).astype(dtype)
+
+ def calc_norm(vec):
+ if is_torch:
+ return torch.norm(vec)
+ else:
+ return np.linalg.norm(vec)
+
+ def calc_dot(v1, v2):
+ if is_torch:
+ return torch.dot(v1, v2)
+ else:
+ return np.dot(v1, v2)
+
+ def mat_mul_vec(mat, vec):
+ # 矩阵乘向量
+ return mat @ vec
+
+ # --- 步骤 1: 初始化 ---
+ # Draw standard Gaussian vectors omega^(1)...omega^(r)
+ Omega = make_random((n, r))
+
+ # --- 步骤 2: 初始采样 ---
+ # Compute Y = A * Omega
+ # 注意:为了保持动态特性,我们用列表存储向量
+ Y = []
+ for i in range(r):
+ # 取出第 i 列
+ omega_col = Omega[:, i]
+ y_col = mat_mul_vec(A, omega_col)
+ Y.append(y_col)
+
+ # --- 步骤 3 & 4: 初始化循环变量 ---
+ j = 0
+ Q = [] # 存放正交基向量
+
+ # 计算阈值 limit
+ # np.sqrt(2 / np.pi) 约等于 0.798
+ const_factor = 0.79788456
+ limit = epsilon / (10 * const_factor)
+
+ # --- 步骤 5: While 循环 ---
+ # 只要前瞻窗口内的向量能量还很大,就继续寻找
+ while True:
+ # 检查是否越界 (防止极其罕见的无限循环)
+ if j >= n:
+ break
+
+ # 获取当前窗口内的向量 Y[j : j+r]
+ # 如果窗口超出了 Y 的当前长度,说明需要生成新的 (虽然后面的逻辑会生成,但这里做个防守)
+ current_window = Y[j : j+r]
+ if not current_window:
+ break
+
+ # 计算窗口内每个向量的范数
+ norms = [calc_norm(y).item() for y in current_window] # .item() 转为 python float 比较
+ max_norm = max(norms)
+
+ # 停止条件
+ if max_norm <= limit:
+ break
+
+ # --- 步骤 7: 投影 (Gram-Schmidt) ---
+ # 这里的 Y[j] 实际上已经被之前的 Q 正交化过了(在步骤13),
+ # 但为了数值稳定性,或者如果是第一轮,我们需要确保它正交。
+ y_current = Y[j]
+
+ # Double Orthogonalization (数值稳定性关键)
+ for _ in range(2): # 做两次以防万一,通常一次也够
+ for q_prev in Q:
+ projection = calc_dot(q_prev, y_current)
+ y_current = y_current - q_prev * projection
+
+ # --- 步骤 8: 归一化 ---
+ norm_y = calc_norm(y_current)
+
+ if norm_y < 1e-15:
+ # 线性相关,跳过
+ j += 1
+ continue
+
+ q_new = y_current / norm_y
+ Q.append(q_new)
+
+ # --- 步骤 10: 生成新的高斯向量 ---
+ omega_new = make_random((n,))
+
+ # --- 步骤 11: 计算新样本 ---
+ # y_new = (I - Q Q*) A omega_new
+ # 先算 A * omega
+ y_new = mat_mul_vec(A, omega_new)
+
+ # 立即对现有的 Q 进行正交化
+ for q in Q:
+ y_new = y_new - q * calc_dot(q, y_new)
+
+ Y.append(y_new)
+
+ # --- 步骤 12 & 13: 更新前瞻窗口内的向量 ---
+ # Y[i] = Y[i] - q_new *
+ # 范围: j+1 到 j+r (注意 Python切片是左闭右开,但这里不仅是切片,是由于 append 导致 len 增加)
+ # 我们只需要更新目前列表中位于 j 之后的所有向量
+ for i in range(j + 1, len(Y)):
+ proj = calc_dot(q_new, Y[i])
+ Y[i] = Y[i] - q_new * proj
+
+ j += 1
+
+ # --- 步骤 16: 构建最终矩阵 ---
+ if not Q:
+ # 返回空矩阵
+ if is_torch:
+ return torch.zeros((m, 0), device=device, dtype=dtype)
+ else:
+ return np.zeros((m, 0), dtype=dtype)
+
+ # 堆叠结果
+ if is_torch:
+ Q_matrix = torch.stack(Q, dim=1)
+ else:
+ Q_matrix = np.column_stack(Q)
+
+ return Q_matrix
+# --- 单元测试/用法示例 ---
+if __name__ == "__main__":
+ # 1. 创建一个具有特定秩的合成矩阵来测试
+ # 假设 m=1000, n=100, 真实秩=10
+ np.random.seed(42) # 固定随机种子以复现结果
+ m, n = 1000, 100
+ true_rank = 10
+
+ # 构造低秩矩阵 A = U * S * V.T
+ U_true, _ = np.linalg.qr(np.random.normal(size=(m, true_rank)))
+ V_true, _ = np.linalg.qr(np.random.normal(size=(n, true_rank)))
+ S_true = np.diag(np.linspace(10, 1, true_rank)) # 奇异值从 10 降到 1
+ A = U_true @ S_true @ V_true.T
+
+ print(f"原始矩阵形状: {A.shape}, 真实秩: {true_rank}")
+
+ # 2. 运行算法
+ target_epsilon = 1e-2
+ Q_approx = adaptive_randomized_range_finder(A, epsilon=target_epsilon)
+
+ # 3. 验证结果
+ found_rank = Q_approx.shape[1]
+ print(f"算法计算出的秩 (Q的列数): {found_rank}")
+
+ # 4. 验证近似误差 || (I - QQ*)A ||
+ # I - QQ* 是投影到 Q 正交补空间的算子
+ # 也就是 A 减去它在 Q 上的投影: A - Q(Q*A)
+ diff = A - Q_approx @ (Q_approx.T @ A)
+ error_norm = np.linalg.norm(diff, ord=2) # 谱范数
+
+ print(f"近似误差 (Spectral Norm): {error_norm:.6f}")
+ print(f"目标误差: {target_epsilon}")
+
+ if error_norm < target_epsilon * 10: # 允许一定的随机浮动
+ print(">> 测试通过:误差在可接受范围内。")
+ else:
+ print(">> 测试警告:误差偏大,请检查参数。")
\ No newline at end of file
diff --git a/randomized_svd.py b/randomized_svd.py
new file mode 100644
index 0000000..e44243e
--- /dev/null
+++ b/randomized_svd.py
@@ -0,0 +1,162 @@
+import numpy as np
+import torch
+from typing import Tuple, Union
+
+# --- 模块化导入 ---
+from adaptive_range_finder import adaptive_randomized_range_finder
+
+def _randomized_svd_2d_padded(
+ A: Union[np.ndarray, torch.Tensor],
+ epsilon: float
+) -> Tuple[Union[np.ndarray, torch.Tensor], ...]:
+ """
+ 内部函数:执行自适应 SVD,并支持 PyTorch/NumPy 自动切换和零填充。
+ """
+ # 1. 检测环境
+ is_torch = isinstance(A, torch.Tensor)
+
+ # 获取形状
+ m, n = A.shape
+ min_dim = min(m, n)
+
+ # 2. 自适应计算 Range (这一步会调用我们刚修好的 adaptive_range_finder)
+ # Q 的类型会和 A 保持一致 (GPU Tensor 或 NumPy)
+ Q = adaptive_randomized_range_finder(A, epsilon=epsilon)
+
+ # 计算 B = Q.T * A
+ # PyTorch 和 NumPy 都支持 @ 运算符
+ B = Q.T @ A
+
+ # 3. 对小矩阵 B 进行标准 SVD (区分框架)
+ if is_torch:
+ # PyTorch 路径
+ # S_hat: (k, k), Sigma: (k,), Vt: (k, n)
+ # 注意:torch.linalg.svd 返回的 U 对应这里的 S_hat
+ S_hat, Sigma_small, Vt_small = torch.linalg.svd(B, full_matrices=False)
+
+ # 还原 U_small = Q @ S_hat
+ U_small = Q @ S_hat
+
+ # 获取当前秩 k
+ k = Sigma_small.shape[0]
+
+ # --- Padding (PyTorch) ---
+ if k < min_dim:
+ # 补全 S
+ Sigma_final = torch.zeros(min_dim, dtype=A.dtype, device=A.device)
+ Sigma_final[:k] = Sigma_small
+
+ # 补全 U
+ U_final = torch.zeros((m, min_dim), dtype=A.dtype, device=A.device)
+ U_final[:, :k] = U_small
+
+ # 补全 Vt
+ Vt_final = torch.zeros((min_dim, n), dtype=A.dtype, device=A.device)
+ Vt_final[:k, :] = Vt_small
+
+ return U_final, Sigma_final, Vt_final
+ else:
+ # 截断(防止 k > min_dim 的浮点误差情况)
+ return U_small[:, :min_dim], Sigma_small[:min_dim], Vt_small[:min_dim, :]
+
+ else:
+ # NumPy 路径 (保持原有逻辑)
+ S_hat, Sigma_small, Vt_small = np.linalg.svd(B, full_matrices=False)
+ U_small = Q @ S_hat
+ k = Sigma_small.shape[0]
+
+ if k < min_dim:
+ Sigma_final = np.zeros(min_dim, dtype=A.dtype)
+ Sigma_final[:k] = Sigma_small
+
+ U_final = np.zeros((m, min_dim), dtype=A.dtype)
+ U_final[:, :k] = U_small
+
+ Vt_final = np.zeros((min_dim, n), dtype=A.dtype)
+ Vt_final[:k, :] = Vt_small
+ return U_final, Sigma_final, Vt_final
+ else:
+ return U_small[:, :min_dim], Sigma_small[:min_dim], Vt_small[:min_dim, :]
+
+def randomized_svd(
+ data: Union[np.ndarray, torch.Tensor],
+ epsilon: float = 1e-2
+) -> Tuple[Union[np.ndarray, torch.Tensor], ...]:
+ """
+ 实现算法 5.1: 逐通道随机化 SVD (支持 Batch/Channel-wise)。
+ 完全兼容 PyTorch GPU Tensor 流水线,无需 CPU 转换。
+
+ 输出维度 (假设输入 3, 64, 64):
+ U: (3, 64, 64)
+ S: (3, 64) (零填充对齐)
+ Vh: (3, 64, 64)
+ """
+
+ # 1. 基础信息获取
+ is_torch = isinstance(data, torch.Tensor)
+ input_shape = data.shape
+
+ # 2. 逐通道处理逻辑
+ if len(input_shape) == 3:
+ # (C, H, W) 模式
+ C, H, W = input_shape
+ min_dim = min(H, W)
+
+ # 准备容器
+ if is_torch:
+ # 直接在 GPU 上分配内存
+ U_batch = torch.zeros((C, H, min_dim), dtype=data.dtype, device=data.device)
+ S_batch = torch.zeros((C, min_dim), dtype=data.dtype, device=data.device)
+ Vt_batch = torch.zeros((C, min_dim, W), dtype=data.dtype, device=data.device)
+ else:
+ U_batch = np.zeros((C, H, min_dim), dtype=data.dtype)
+ S_batch = np.zeros((C, min_dim), dtype=data.dtype)
+ Vt_batch = np.zeros((C, min_dim, W), dtype=data.dtype)
+
+ for i in range(C):
+ # 取出单个通道 (保持 Tensor 属性)
+ # data[i] 依然是 GPU tensor
+ u, s, vt = _randomized_svd_2d_padded(data[i], epsilon)
+
+ U_batch[i] = u
+ S_batch[i] = s
+ Vt_batch[i] = vt
+
+ return U_batch, S_batch, Vt_batch
+
+ elif len(input_shape) == 2:
+ # 2D 模式直接调用
+ return _randomized_svd_2d_padded(data, epsilon)
+
+ else:
+ raise ValueError(f"仅支持 2D 或 3D 输入,当前形状: {input_shape}")
+
+# --- 验证代码 (确保 GPU 流程通畅) ---
+if __name__ == "__main__":
+ if torch.cuda.is_available():
+ print("正在测试 CUDA GPU 模式...")
+ device = "cuda:0"
+
+ # 1. 创建 GPU 数据 (3, 64, 64)
+ # 模拟真实秩 rank=10
+ rank = 10
+ U = torch.randn(3, 64, rank, device=device)
+ S = torch.randn(3, rank, device=device)
+ V = torch.randn(3, rank, 64, device=device)
+ z_t = U @ torch.diag_embed(S) @ V
+
+ print(f"输入数据位于: {z_t.device}")
+
+ # 2. 运行算法
+ # 期望:没有任何报错,且输出依然在 GPU 上
+ U_out, S_out, Vh_out = randomized_svd(z_t, epsilon=1e-2)
+
+ print(f"输出 U 位于: {U_out.device}")
+ print(f"输出形状: {U_out.shape}, {S_out.shape}, {Vh_out.shape}")
+
+ if U_out.is_cuda:
+ print("✅ 测试通过:全链路 GPU 计算成功!")
+ else:
+ print("❌ 测试失败:数据回落到了 CPU。")
+ else:
+ print("未检测到 GPU,跳过 GPU 测试。")
\ No newline at end of file
From eb54163884cefb913cabbbee1881e9318db69689 Mon Sep 17 00:00:00 2001
From: Georige <145737474+Georige@users.noreply.github.com>
Date: Sun, 1 Feb 2026 20:08:28 +0800
Subject: [PATCH 6/6] add time recording and rsvd
---
ldm/models/diffusion/diffstategrad_ddim.py | 25 +++++++++++++++++++---
1 file changed, 22 insertions(+), 3 deletions(-)
diff --git a/ldm/models/diffusion/diffstategrad_ddim.py b/ldm/models/diffusion/diffstategrad_ddim.py
index 3170ba5..0750910 100644
--- a/ldm/models/diffusion/diffstategrad_ddim.py
+++ b/ldm/models/diffusion/diffstategrad_ddim.py
@@ -31,7 +31,7 @@ def compute_rank_for_explained_variance(singular_values, explained_variance_cuto
rank = np.searchsorted(cumulative_variance, explained_variance_cutoff) + 1
total_rank += rank
return int(total_rank / 3)
-
+import time
def compute_svd_and_adaptive_rank(z_t, var_cutoff):
"""
Compute SVD and adaptive rank for the input tensor.
@@ -44,14 +44,33 @@ def compute_svd_and_adaptive_rank(z_t, var_cutoff):
tuple: (U, s, Vh, adaptive_rank) where U, s, Vh are SVD components
and adaptive_rank is the computed rank
"""
- # Compute SVD of current image representation
+
+ # 1. begin
+ start_time = time.perf_counter()
+
+ # 2. run
U, s, Vh = torch.linalg.svd(z_t[0], full_matrices=False)
+ # 3. end
+ end_time = time.perf_counter()
+
+ # 4. minus
+ time1 = end_time - start_time
+
+
+ # Compute SVD of current image representation
+ # implement of rSVD
+ start_time = time.perf_counter()
+ U, sb, Vh = randomized_svd(z_t[0],epsilon = 0.1)
+ end_time = time.perf_counter()
+ time2 = end_time - start_time
+ excutive_time = time1 - time2
+ print(f"relative time(svd-rsvd): {execution_time:.6f} 秒")
# Compute adaptive rank
s_numpy = s.detach().cpu().numpy()
adaptive_rank = compute_rank_for_explained_variance([s_numpy], var_cutoff)
-
+ print("rank:",adaptive_rank)
return U, s, Vh, adaptive_rank
def apply_diffstategrad(norm_grad, iteration_count, period, U=None, s=None, Vh=None, adaptive_rank=None):