Spaces:

ZeqiangLai
/

DragGAN

Runtime error

App Files Files Community

ZeqiangLai commited on May 26, 2023

Commit

5ae5c5c

1 Parent(s): 5febeab

update

Browse files

Files changed (17) hide show

.gitignore +160 -0
drag_gan.py +12 -10
gradio_app.py +90 -17
requirements.txt +2 -1
stylegan2/{_init__.py → __init__.py} +0 -0
stylegan2/inversion.py +209 -0
stylegan2/lpips/__init__.py +5 -0
stylegan2/lpips/base_model.py +58 -0
stylegan2/lpips/dist_model.py +314 -0
stylegan2/lpips/networks_basic.py +188 -0
stylegan2/lpips/pretrained_networks.py +181 -0
stylegan2/lpips/util.py +160 -0
stylegan2/model.py +19 -1
stylegan2/op/__init__.py +2 -2
stylegan2/op/conv2d_gradfix.py +2 -0
stylegan2/op/fused_act.py +38 -8
stylegan2/op/upfirdn2d.py +35 -12

.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

drag_gan.py CHANGED Viewed

@@ -136,20 +136,21 @@ def bilinear_interpolate_torch(im, y, x):
     y : 1,numPoints -- pixel location y float
     x : 1,numPOints -- pixel location y float
     """
-    x0 = torch.floor(x).long()
     x1 = x0 + 1
-    y0 = torch.floor(y).long()
     y1 = y0 + 1
-    wa = (x1.float() - x) * (y1.float() - y)
-    wb = (x1.float() - x) * (y - y0.float())
-    wc = (x - x0.float()) * (y1.float() - y)
-    wd = (x - x0.float()) * (y - y0.float())
     # Instead of clamp
-    x1 = x1 - torch.floor(x1 / im.shape[3]).int()
-    y1 = y1 - torch.floor(y1 / im.shape[2]).int()
     Ia = im[:, :, y0, x0]
     Ib = im[:, :, y1, x0]
     Ic = im[:, :, y0, x1]
@@ -194,7 +195,8 @@ def drag_gan(g_ema, latent: torch.Tensor, noise, F, handle_points, target_points
                     f2 = bilinear_interpolate_torch(F2, qi[0] + di[0], qi[1] + di[1])
                     loss += FF.l1_loss(f2, f1)
-            loss += ((F2 - F0) * (1 - mask)).abs().mean() * lam
             loss.backward()
             optimizer.step()

     y : 1,numPoints -- pixel location y float
     x : 1,numPOints -- pixel location y float
     """
+    device = im.device
+    x0 = torch.floor(x).long().to(device)
     x1 = x0 + 1
+    y0 = torch.floor(y).long().to(device)
     y1 = y0 + 1
+    wa = ((x1.float() - x) * (y1.float() - y)).to(device)
+    wb = ((x1.float() - x) * (y - y0.float())).to(device)
+    wc = ((x - x0.float()) * (y1.float() - y)).to(device)
+    wd = ((x - x0.float()) * (y - y0.float())).to(device)
     # Instead of clamp
+    x1 = x1 - torch.floor(x1 / im.shape[3]).int().to(device)
+    y1 = y1 - torch.floor(y1 / im.shape[2]).int().to(device)
     Ia = im[:, :, y0, x0]
     Ib = im[:, :, y1, x0]
     Ic = im[:, :, y0, x1]
                     f2 = bilinear_interpolate_torch(F2, qi[0] + di[0], qi[1] + di[1])
                     loss += FF.l1_loss(f2, f1)
+            if mask is not None:
+                loss += ((F2 - F0) * (1 - mask)).abs().mean() * lam
             loss.backward()
             optimizer.step()

gradio_app.py CHANGED Viewed

@@ -7,12 +7,14 @@ from PIL import Image
 import uuid
 from drag_gan import drag_gan, stylegan2
-device = 'cuda'
 SIZE_TO_CLICK_SIZE = {
-    1024: 5,
     256: 2
 }
@@ -21,8 +23,32 @@ CKPT_SIZE = {
     'stylegan2-cat-config-f.pt': 256,
     'stylegan2-church-config-f.pt': 256,
     'stylegan2-horse-config-f.pt': 256,
 }
 class ImageMask(gr.components.Image):
     """
@@ -94,11 +120,14 @@ def on_drag(model, points, max_iters, state, size, mask):
     handle_points = [torch.tensor(p).float() for p in points['handle']]
     target_points = [torch.tensor(p).float() for p in points['target']]
-    mask = Image.fromarray(mask['mask']).convert('L')
-    mask = np.array(mask) == 255
-    mask = torch.from_numpy(mask).float().to(device)
-    mask = mask.unsqueeze(0).unsqueeze(0)
     step = 0
     for sample2, latent, F, handle_points in drag_gan(model.g_ema, latent, noise, F,
@@ -149,7 +178,7 @@ def on_change_model(selected, model):
         'sample': sample,
         'history': []
     }
-    return model, state, to_image(sample), size
 def on_new_image(model):
@@ -187,11 +216,29 @@ def on_show_save():
     return gr.update(visible=True)
 def main():
     torch.cuda.manual_seed(25)
     with gr.Blocks() as demo:
-        wrapped_model = ModelWrapper()
         model = gr.State(wrapped_model)
         sample_z = torch.randn([1, 512], device=device)
         latent, noise = wrapped_model.g_ema.prepare([sample_z])
@@ -199,11 +246,11 @@ def main():
         gr.Markdown(
             """
-            # DragGAN (Unofficial)
             Unofficial implementation of [Drag Your GAN: Interactive Point-based Manipulation on the Generative Image Manifold](https://vcai.mpi-inf.mpg.de/projects/DragGAN/)
-            [Github](https://github.com/Zeqiang-Lai/DragGAN) | [Official Implementation](https://github.com/XingangPan/DragGAN) (Not released yet)
             ## Tutorial
@@ -211,6 +258,22 @@ def main():
             2. Setup a least one pair of handle point and target point.
             3. Click "Drag it".
             """,
         )
         state = gr.State({
@@ -221,12 +284,12 @@ def main():
             'history': []
         })
         points = gr.State({'target': [], 'handle': []})
-        size = gr.State(1024)
         with gr.Row():
             with gr.Column(scale=0.3):
                 with gr.Accordion("Model"):
-                    model_dropdown = gr.Dropdown(choices=list(CKPT_SIZE.keys()), value='stylegan2-ffhq-config-f.pt',
                                                  label='StyleGAN2 model')
                     max_iters = gr.Slider(1, 500, 20, step=1, label='Max Iterations')
                     new_btn = gr.Button('New Image')
@@ -252,24 +315,34 @@ def main():
             with gr.Column():
                 with gr.Tabs():
                     with gr.Tab('Draw a Mask', id='mask'):
-                        mask = gr.ImageMask(value=to_image(sample), label='Mask').style(height=768, width=768)
                     with gr.Tab('Setup Handle Points', id='input'):
-                        image = gr.Image(to_image(sample)).style(height=768, width=768)
         image.select(on_click, [image, target_point, points, size], [image, text, target_point])
         btn.click(on_drag, inputs=[model, points, max_iters, state, size, mask], outputs=[image, state, progress]).then(
             on_show_save, outputs=save_panel).then(
             on_save_files, inputs=[image, state], outputs=[files]
         )
         reset_btn.click(on_reset, inputs=[points, image, state], outputs=[points, image])
         undo_btn.click(on_undo, inputs=[points, image, state, size], outputs=[points, image])
-        model_dropdown.change(on_change_model, inputs=[model_dropdown, model], outputs=[model, state, image, size])
         new_btn.click(on_new_image, inputs=[model], outputs=[image, mask, state, points, target_point])
         max_iters.change(on_max_iter_change, inputs=max_iters, outputs=progress)
     return demo
 if __name__ == '__main__':
-    import fire
     demo = main()
-    fire.Fire(demo.queue(concurrency_count=1, max_size=20).launch)

 import uuid
 from drag_gan import drag_gan, stylegan2
+from stylegan2.inversion import inverse_image
+device = 'cpu'
 SIZE_TO_CLICK_SIZE = {
+    1024: 8,
+    512: 5,
     256: 2
 }
     'stylegan2-cat-config-f.pt': 256,
     'stylegan2-church-config-f.pt': 256,
     'stylegan2-horse-config-f.pt': 256,
+    'ada/ffhq.pt': 1024,
+    'ada/afhqcat.pt': 512,
+    'ada/afhqdog.pt': 512,
+    'ada/afhqwild.pt': 512,
+    'ada/brecahad.pt': 512,
+    'ada/metfaces.pt': 512,
 }
+DEFAULT_CKPT = 'stylegan2-ffhq-config-f.pt'
+class grImage(gr.components.Image):
+    is_template = True
+    def preprocess(self, x):
+        if x is None:
+            return x
+        if self.tool == "sketch" and self.source in ["upload", "webcam"]:
+            decode_image = gr.processing_utils.decode_base64_to_image(x)
+            width, height = decode_image.size
+            mask = np.zeros((height, width, 4), dtype=np.uint8)
+            mask[..., -1] = 255
+            mask = self.postprocess(mask)
+            x = {'image': x, 'mask': mask}
+        return super().preprocess(x)
 class ImageMask(gr.components.Image):
     """
     handle_points = [torch.tensor(p).float() for p in points['handle']]
     target_points = [torch.tensor(p).float() for p in points['target']]
+    if mask.get('mask') is not None:
+        mask = Image.fromarray(mask['mask']).convert('L')
+        mask = np.array(mask) == 255
+        mask = torch.from_numpy(mask).float().to(device)
+        mask = mask.unsqueeze(0).unsqueeze(0)
+    else:
+        mask = None
     step = 0
     for sample2, latent, F, handle_points in drag_gan(model.g_ema, latent, noise, F,
         'sample': sample,
         'history': []
     }
+    return model, state, to_image(sample), to_image(sample), size
 def on_new_image(model):
     return gr.update(visible=True)
+def on_image_change(model, image_size, image):
+    image = Image.fromarray(image)
+    result = inverse_image(
+        model.g_ema,
+        image,
+        image_size=image_size
+    )
+    result['history'] = []
+    image = to_image(result['sample'])
+    points = {'target': [], 'handle': []}
+    target_point = False
+    return image, image, result, points, target_point
+def on_mask_change(mask):
+    return mask['image']
 def main():
     torch.cuda.manual_seed(25)
     with gr.Blocks() as demo:
+        wrapped_model = ModelWrapper(ckpt=DEFAULT_CKPT, size=CKPT_SIZE[DEFAULT_CKPT])
         model = gr.State(wrapped_model)
         sample_z = torch.randn([1, 512], device=device)
         latent, noise = wrapped_model.g_ema.prepare([sample_z])
         gr.Markdown(
             """
+            # DragGAN
             Unofficial implementation of [Drag Your GAN: Interactive Point-based Manipulation on the Generative Image Manifold](https://vcai.mpi-inf.mpg.de/projects/DragGAN/)
+            [Our Implementation](https://github.com/Zeqiang-Lai/DragGAN) | [Official Implementation](https://github.com/XingangPan/DragGAN) (Not released yet)
             ## Tutorial
             2. Setup a least one pair of handle point and target point.
             3. Click "Drag it".
+            ## Hints
+            - Handle points (Blue): the point you want to drag.
+            - Target points (Red): the destination you want to drag towards to.
+            ## Primary Support of Custom Image.
+            - We now support dragging user uploaded image by GAN inversion.
+            - **Please upload your image at `Setup Handle Points` pannel.** Upload it from `Draw a Mask` would cause errors for now.
+            - Due to the limitation of GAN inversion,
+                - You might wait roughly 1 minute to see the GAN version of the uploaded image.
+                - The shown image might be slightly difference from the uploaded one.
+                - It could also fail to invert the uploaded image and generate very poor results.
+                - Idealy, you should choose the closest model of the uploaded image. For example, choose `stylegan2-ffhq-config-f.pt` for human face. `stylegan2-cat-config-f.pt` for cat.
+            > Please fire an issue if you have encounted any problem. Also don't forgot to give a star to the [Official Repo](https://github.com/XingangPan/DragGAN), [our project](https://github.com/Zeqiang-Lai/DragGAN) could not exist without it.
             """,
         )
         state = gr.State({
             'history': []
         })
         points = gr.State({'target': [], 'handle': []})
+        size = gr.State(CKPT_SIZE[DEFAULT_CKPT])
         with gr.Row():
             with gr.Column(scale=0.3):
                 with gr.Accordion("Model"):
+                    model_dropdown = gr.Dropdown(choices=list(CKPT_SIZE.keys()), value=DEFAULT_CKPT,
                                                  label='StyleGAN2 model')
                     max_iters = gr.Slider(1, 500, 20, step=1, label='Max Iterations')
                     new_btn = gr.Button('New Image')
             with gr.Column():
                 with gr.Tabs():
                     with gr.Tab('Draw a Mask', id='mask'):
+                        mask = ImageMask(value=to_image(sample), label='Mask').style(height=768, width=768)
                     with gr.Tab('Setup Handle Points', id='input'):
+                        image = grImage(to_image(sample)).style(height=768, width=768)
         image.select(on_click, [image, target_point, points, size], [image, text, target_point])
+        image.upload(on_image_change, [model, size, image], [image, mask, state, points, target_point])
+        mask.upload(on_mask_change, [mask], [image])
         btn.click(on_drag, inputs=[model, points, max_iters, state, size, mask], outputs=[image, state, progress]).then(
             on_show_save, outputs=save_panel).then(
             on_save_files, inputs=[image, state], outputs=[files]
         )
         reset_btn.click(on_reset, inputs=[points, image, state], outputs=[points, image])
         undo_btn.click(on_undo, inputs=[points, image, state, size], outputs=[points, image])
+        model_dropdown.change(on_change_model, inputs=[model_dropdown, model], outputs=[model, state, image, mask, size])
         new_btn.click(on_new_image, inputs=[model], outputs=[image, mask, state, points, target_point])
         max_iters.change(on_max_iter_change, inputs=max_iters, outputs=progress)
     return demo
 if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--device', default='cuda')
+    parser.add_argument('--share', action='store_true')
+    parser.add_argument('-p', '--port', default=None)
+    parser.add_argument('--ip', default=None)
+    args = parser.parse_args()
+    device = args.device
     demo = main()
+    print('Successfully loaded, starting gradio demo')
+    demo.queue(concurrency_count=1, max_size=20).launch(share=args.share, server_name=args.ip, server_port=args.port)

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ numpy
 ninja
 fire
 imageio
-torchvision

 ninja
 fire
 imageio
+torchvision
+IPython

stylegan2/{_init__.py → __init__.py} RENAMED Viewed

File without changes

stylegan2/inversion.py ADDED Viewed

	@@ -0,0 +1,209 @@

+import math
+import os
+import torch
+from torch import optim
+from torch.nn import functional as FF
+from torchvision import transforms
+from PIL import Image
+from tqdm import tqdm
+import dataclasses
+from .lpips import util
+def noise_regularize(noises):
+    loss = 0
+    for noise in noises:
+        size = noise.shape[2]
+        while True:
+            loss = (
+                loss
+                + (noise * torch.roll(noise, shifts=1, dims=3)).mean().pow(2)
+                + (noise * torch.roll(noise, shifts=1, dims=2)).mean().pow(2)
+            )
+            if size <= 8:
+                break
+            noise = noise.reshape([-1, 1, size // 2, 2, size // 2, 2])
+            noise = noise.mean([3, 5])
+            size //= 2
+    return loss
+def noise_normalize_(noises):
+    for noise in noises:
+        mean = noise.mean()
+        std = noise.std()
+        noise.data.add_(-mean).div_(std)
+def get_lr(t, initial_lr, rampdown=0.25, rampup=0.05):
+    lr_ramp = min(1, (1 - t) / rampdown)
+    lr_ramp = 0.5 - 0.5 * math.cos(lr_ramp * math.pi)
+    lr_ramp = lr_ramp * min(1, t / rampup)
+    return initial_lr * lr_ramp
+def latent_noise(latent, strength):
+    noise = torch.randn_like(latent) * strength
+    return latent + noise
+def make_image(tensor):
+    return (
+        tensor.detach()
+        .clamp_(min=-1, max=1)
+        .add(1)
+        .div_(2)
+        .mul(255)
+        .type(torch.uint8)
+        .permute(0, 2, 3, 1)
+        .to("cpu")
+        .numpy()
+    )
+@dataclasses.dataclass
+class InverseConfig:
+    lr_warmup = 0.05
+    lr_decay = 0.25
+    lr = 0.1
+    noise = 0.05
+    noise_decay = 0.75
+    step = 1000
+    noise_regularize = 1e5
+    mse = 0
+    w_plus = False,
+def inverse_image(
+    g_ema,
+    image,
+    image_size=256,
+    config=InverseConfig()
+):
+    device = "cuda"
+    args = config
+    n_mean_latent = 10000
+    resize = min(image_size, 256)
+    transform = transforms.Compose(
+        [
+            transforms.Resize(resize),
+            transforms.CenterCrop(resize),
+            transforms.ToTensor(),
+            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
+        ]
+    )
+    imgs = []
+    img = transform(image)
+    imgs.append(img)
+    imgs = torch.stack(imgs, 0).to(device)
+    with torch.no_grad():
+        noise_sample = torch.randn(n_mean_latent, 512, device=device)
+        latent_out = g_ema.style(noise_sample)
+        latent_mean = latent_out.mean(0)
+        latent_std = ((latent_out - latent_mean).pow(2).sum() / n_mean_latent) ** 0.5
+    percept = util.PerceptualLoss(
+        model="net-lin", net="vgg", use_gpu=device.startswith("cuda")
+    )
+    noises_single = g_ema.make_noise()
+    noises = []
+    for noise in noises_single:
+        noises.append(noise.repeat(imgs.shape[0], 1, 1, 1).normal_())
+    latent_in = latent_mean.detach().clone().unsqueeze(0).repeat(imgs.shape[0], 1)
+    if args.w_plus:
+        latent_in = latent_in.unsqueeze(1).repeat(1, g_ema.n_latent, 1)
+    latent_in.requires_grad = True
+    for noise in noises:
+        noise.requires_grad = True
+    optimizer = optim.Adam([latent_in] + noises, lr=args.lr)
+    pbar = tqdm(range(args.step))
+    latent_path = []
+    for i in pbar:
+        t = i / args.step
+        lr = get_lr(t, args.lr)
+        optimizer.param_groups[0]["lr"] = lr
+        noise_strength = latent_std * args.noise * max(0, 1 - t / args.noise_decay) ** 2
+        latent_n = latent_noise(latent_in, noise_strength.item())
+        latent, noise = g_ema.prepare([latent_n], input_is_latent=True, noise=noises)
+        img_gen, F = g_ema.generate(latent, noise)
+        batch, channel, height, width = img_gen.shape
+        if height > 256:
+            factor = height // 256
+            img_gen = img_gen.reshape(
+                batch, channel, height // factor, factor, width // factor, factor
+            )
+            img_gen = img_gen.mean([3, 5])
+        p_loss = percept(img_gen, imgs).sum()
+        n_loss = noise_regularize(noises)
+        mse_loss = FF.mse_loss(img_gen, imgs)
+        loss = p_loss + args.noise_regularize * n_loss + args.mse * mse_loss
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        noise_normalize_(noises)
+        if (i + 1) % 100 == 0:
+            latent_path.append(latent_in.detach().clone())
+        pbar.set_description(
+            (
+                f"perceptual: {p_loss.item():.4f}; noise regularize: {n_loss.item():.4f};"
+                f" mse: {mse_loss.item():.4f}; lr: {lr:.4f}"
+            )
+        )
+    latent, noise = g_ema.prepare([latent_path[-1]], input_is_latent=True, noise=noises)
+    img_gen, F = g_ema.generate(latent, noise)
+    img_ar = make_image(img_gen)
+    i = 0
+    noise_single = []
+    for noise in noises:
+        noise_single.append(noise[i: i + 1])
+    result = {
+        "latent": latent,
+        "noise": noise_single,
+        'F': F,
+        "sample": img_gen,
+    }
+    pil_img = Image.fromarray(img_ar[i])
+    pil_img.save('project.png')
+    return result

stylegan2/lpips/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function

stylegan2/lpips/base_model.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import os
+import numpy as np
+import torch
+from torch.autograd import Variable
+from pdb import set_trace as st
+from IPython import embed
+class BaseModel():
+    def __init__(self):
+        pass;
+    def name(self):
+        return 'BaseModel'
+    def initialize(self, use_gpu=True, gpu_ids=[0]):
+        self.use_gpu = use_gpu
+        self.gpu_ids = gpu_ids
+    def forward(self):
+        pass
+    def get_image_paths(self):
+        pass
+    def optimize_parameters(self):
+        pass
+    def get_current_visuals(self):
+        return self.input
+    def get_current_errors(self):
+        return {}
+    def save(self, label):
+        pass
+    # helper saving function that can be used by subclasses
+    def save_network(self, network, path, network_label, epoch_label):
+        save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
+        save_path = os.path.join(path, save_filename)
+        torch.save(network.state_dict(), save_path)
+    # helper loading function that can be used by subclasses
+    def load_network(self, network, network_label, epoch_label):
+        save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
+        save_path = os.path.join(self.save_dir, save_filename)
+        print('Loading network from %s'%save_path)
+        network.load_state_dict(torch.load(save_path))
+    def update_learning_rate():
+        pass
+    def get_image_paths(self):
+        return self.image_paths
+    def save_done(self, flag=False):
+        np.save(os.path.join(self.save_dir, 'done_flag'),flag)
+        np.savetxt(os.path.join(self.save_dir, 'done_flag'),[flag,],fmt='%i')

stylegan2/lpips/dist_model.py ADDED Viewed

	@@ -0,0 +1,314 @@

+from __future__ import absolute_import
+import sys
+import numpy as np
+import torch
+from torch import nn
+import os
+from collections import OrderedDict
+from torch.autograd import Variable
+import itertools
+from .base_model import BaseModel
+from scipy.ndimage import zoom
+import fractions
+import functools
+import skimage.transform
+from tqdm import tqdm
+import urllib
+from IPython import embed
+from . import networks_basic as networks
+from . import util
+class DownloadProgressBar(tqdm):
+    def update_to(self, b=1, bsize=1, tsize=None):
+        if tsize is not None:
+            self.total = tsize
+        self.update(b * bsize - self.n)
+def get_path(base_path):
+    BASE_DIR = os.path.join('checkpoints')
+    save_path = os.path.join(BASE_DIR, base_path)
+    if not os.path.exists(save_path):
+        url = f"https://huggingface.co/aaronb/StyleGAN2/resolve/main/{base_path}"
+        print(f'{base_path} not found')
+        print('Try to download from huggingface: ', url)
+        os.makedirs(os.path.dirname(save_path), exist_ok=True)
+        download_url(url, save_path)
+        print('Downloaded to ', save_path)
+    return save_path
+def download_url(url, output_path):
+    with DownloadProgressBar(unit='B', unit_scale=True,
+                             miniters=1, desc=url.split('/')[-1]) as t:
+        urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to)
+class DistModel(BaseModel):
+    def name(self):
+        return self.model_name
+    def initialize(self, model='net-lin', net='alex', colorspace='Lab', pnet_rand=False, pnet_tune=False, model_path=None,
+                   use_gpu=True, printNet=False, spatial=False,
+                   is_train=False, lr=.0001, beta1=0.5, version='0.1', gpu_ids=[0]):
+        '''
+        INPUTS
+            model - ['net-lin'] for linearly calibrated network
+                    ['net'] for off-the-shelf network
+                    ['L2'] for L2 distance in Lab colorspace
+                    ['SSIM'] for ssim in RGB colorspace
+            net - ['squeeze','alex','vgg']
+            model_path - if None, will look in weights/[NET_NAME].pth
+            colorspace - ['Lab','RGB'] colorspace to use for L2 and SSIM
+            use_gpu - bool - whether or not to use a GPU
+            printNet - bool - whether or not to print network architecture out
+            spatial - bool - whether to output an array containing varying distances across spatial dimensions
+            spatial_shape - if given, output spatial shape. if None then spatial shape is determined automatically via spatial_factor (see below).
+            spatial_factor - if given, specifies upsampling factor relative to the largest spatial extent of a convolutional layer. if None then resized to size of input images.
+            spatial_order - spline order of filter for upsampling in spatial mode, by default 1 (bilinear).
+            is_train - bool - [True] for training mode
+            lr - float - initial learning rate
+            beta1 - float - initial momentum term for adam
+            version - 0.1 for latest, 0.0 was original (with a bug)
+            gpu_ids - int array - [0] by default, gpus to use
+        '''
+        BaseModel.initialize(self, use_gpu=use_gpu, gpu_ids=gpu_ids)
+        self.model = model
+        self.net = net
+        self.is_train = is_train
+        self.spatial = spatial
+        self.gpu_ids = gpu_ids
+        self.model_name = '%s [%s]' % (model, net)
+        if(self.model == 'net-lin'):  # pretrained net + linear layer
+            self.net = networks.PNetLin(pnet_rand=pnet_rand, pnet_tune=pnet_tune, pnet_type=net,
+                                        use_dropout=True, spatial=spatial, version=version, lpips=True)
+            kw = {}
+            if not use_gpu:
+                kw['map_location'] = 'cpu'
+            if(model_path is None):
+                model_path = get_path('weights/v%s/%s.pth' % (version, net))
+            if(not is_train):
+                print('Loading model from: %s' % model_path)
+                self.net.load_state_dict(torch.load(model_path, **kw), strict=False)
+        elif(self.model == 'net'):  # pretrained network
+            self.net = networks.PNetLin(pnet_rand=pnet_rand, pnet_type=net, lpips=False)
+        elif(self.model in ['L2', 'l2']):
+            self.net = networks.L2(use_gpu=use_gpu, colorspace=colorspace)  # not really a network, only for testing
+            self.model_name = 'L2'
+        elif(self.model in ['DSSIM', 'dssim', 'SSIM', 'ssim']):
+            self.net = networks.DSSIM(use_gpu=use_gpu, colorspace=colorspace)
+            self.model_name = 'SSIM'
+        else:
+            raise ValueError("Model [%s] not recognized." % self.model)
+        self.parameters = list(self.net.parameters())
+        if self.is_train:  # training mode
+            # extra network on top to go from distances (d0,d1) => predicted human judgment (h*)
+            self.rankLoss = networks.BCERankingLoss()
+            self.parameters += list(self.rankLoss.net.parameters())
+            self.lr = lr
+            self.old_lr = lr
+            self.optimizer_net = torch.optim.Adam(self.parameters, lr=lr, betas=(beta1, 0.999))
+        else:  # test mode
+            self.net.eval()
+        if(use_gpu):
+            self.net.to(gpu_ids[0])
+            self.net = torch.nn.DataParallel(self.net, device_ids=gpu_ids)
+            if(self.is_train):
+                self.rankLoss = self.rankLoss.to(device=gpu_ids[0])  # just put this on GPU0
+        if(printNet):
+            print('---------- Networks initialized -------------')
+            networks.print_network(self.net)
+            print('-----------------------------------------------')
+    def forward(self, in0, in1, retPerLayer=False):
+        ''' Function computes the distance between image patches in0 and in1
+        INPUTS
+            in0, in1 - torch.Tensor object of shape Nx3xXxY - image patch scaled to [-1,1]
+        OUTPUT
+            computed distances between in0 and in1
+        '''
+        return self.net.forward(in0, in1, retPerLayer=retPerLayer)
+    # ***** TRAINING FUNCTIONS *****
+    def optimize_parameters(self):
+        self.forward_train()
+        self.optimizer_net.zero_grad()
+        self.backward_train()
+        self.optimizer_net.step()
+        self.clamp_weights()
+    def clamp_weights(self):
+        for module in self.net.modules():
+            if(hasattr(module, 'weight') and module.kernel_size == (1, 1)):
+                module.weight.data = torch.clamp(module.weight.data, min=0)
+    def set_input(self, data):
+        self.input_ref = data['ref']
+        self.input_p0 = data['p0']
+        self.input_p1 = data['p1']
+        self.input_judge = data['judge']
+        if(self.use_gpu):
+            self.input_ref = self.input_ref.to(device=self.gpu_ids[0])
+            self.input_p0 = self.input_p0.to(device=self.gpu_ids[0])
+            self.input_p1 = self.input_p1.to(device=self.gpu_ids[0])
+            self.input_judge = self.input_judge.to(device=self.gpu_ids[0])
+        self.var_ref = Variable(self.input_ref, requires_grad=True)
+        self.var_p0 = Variable(self.input_p0, requires_grad=True)
+        self.var_p1 = Variable(self.input_p1, requires_grad=True)
+    def forward_train(self):  # run forward pass
+        # print(self.net.module.scaling_layer.shift)
+        # print(torch.norm(self.net.module.net.slice1[0].weight).item(), torch.norm(self.net.module.lin0.model[1].weight).item())
+        self.d0 = self.forward(self.var_ref, self.var_p0)
+        self.d1 = self.forward(self.var_ref, self.var_p1)
+        self.acc_r = self.compute_accuracy(self.d0, self.d1, self.input_judge)
+        self.var_judge = Variable(1. * self.input_judge).view(self.d0.size())
+        self.loss_total = self.rankLoss.forward(self.d0, self.d1, self.var_judge * 2. - 1.)
+        return self.loss_total
+    def backward_train(self):
+        torch.mean(self.loss_total).backward()
+    def compute_accuracy(self, d0, d1, judge):
+        ''' d0, d1 are Variables, judge is a Tensor '''
+        d1_lt_d0 = (d1 < d0).cpu().data.numpy().flatten()
+        judge_per = judge.cpu().numpy().flatten()
+        return d1_lt_d0 * judge_per + (1 - d1_lt_d0) * (1 - judge_per)
+    def get_current_errors(self):
+        retDict = OrderedDict([('loss_total', self.loss_total.data.cpu().numpy()),
+                               ('acc_r', self.acc_r)])
+        for key in retDict.keys():
+            retDict[key] = np.mean(retDict[key])
+        return retDict
+    def get_current_visuals(self):
+        zoom_factor = 256 / self.var_ref.data.size()[2]
+        ref_img = util.tensor2im(self.var_ref.data)
+        p0_img = util.tensor2im(self.var_p0.data)
+        p1_img = util.tensor2im(self.var_p1.data)
+        ref_img_vis = zoom(ref_img, [zoom_factor, zoom_factor, 1], order=0)
+        p0_img_vis = zoom(p0_img, [zoom_factor, zoom_factor, 1], order=0)
+        p1_img_vis = zoom(p1_img, [zoom_factor, zoom_factor, 1], order=0)
+        return OrderedDict([('ref', ref_img_vis),
+                            ('p0', p0_img_vis),
+                            ('p1', p1_img_vis)])
+    def save(self, path, label):
+        if(self.use_gpu):
+            self.save_network(self.net.module, path, '', label)
+        else:
+            self.save_network(self.net, path, '', label)
+        self.save_network(self.rankLoss.net, path, 'rank', label)
+    def update_learning_rate(self, nepoch_decay):
+        lrd = self.lr / nepoch_decay
+        lr = self.old_lr - lrd
+        for param_group in self.optimizer_net.param_groups:
+            param_group['lr'] = lr
+        print('update lr [%s] decay: %f -> %f' % (type, self.old_lr, lr))
+        self.old_lr = lr
+def score_2afc_dataset(data_loader, func, name=''):
+    ''' Function computes Two Alternative Forced Choice (2AFC) score using
+        distance function 'func' in dataset 'data_loader'
+    INPUTS
+        data_loader - CustomDatasetDataLoader object - contains a TwoAFCDataset inside
+        func - callable distance function - calling d=func(in0,in1) should take 2
+            pytorch tensors with shape Nx3xXxY, and return numpy array of length N
+    OUTPUTS
+        [0] - 2AFC score in [0,1], fraction of time func agrees with human evaluators
+        [1] - dictionary with following elements
+            d0s,d1s - N arrays containing distances between reference patch to perturbed patches
+            gts - N array in [0,1], preferred patch selected by human evaluators
+                (closer to "0" for left patch p0, "1" for right patch p1,
+                "0.6" means 60pct people preferred right patch, 40pct preferred left)
+            scores - N array in [0,1], corresponding to what percentage function agreed with humans
+    CONSTS
+        N - number of test triplets in data_loader
+    '''
+    d0s = []
+    d1s = []
+    gts = []
+    for data in tqdm(data_loader.load_data(), desc=name):
+        d0s += func(data['ref'], data['p0']).data.cpu().numpy().flatten().tolist()
+        d1s += func(data['ref'], data['p1']).data.cpu().numpy().flatten().tolist()
+        gts += data['judge'].cpu().numpy().flatten().tolist()
+    d0s = np.array(d0s)
+    d1s = np.array(d1s)
+    gts = np.array(gts)
+    scores = (d0s < d1s) * (1. - gts) + (d1s < d0s) * gts + (d1s == d0s) * .5
+    return(np.mean(scores), dict(d0s=d0s, d1s=d1s, gts=gts, scores=scores))
+def score_jnd_dataset(data_loader, func, name=''):
+    ''' Function computes JND score using distance function 'func' in dataset 'data_loader'
+    INPUTS
+        data_loader - CustomDatasetDataLoader object - contains a JNDDataset inside
+        func - callable distance function - calling d=func(in0,in1) should take 2
+            pytorch tensors with shape Nx3xXxY, and return pytorch array of length N
+    OUTPUTS
+        [0] - JND score in [0,1], mAP score (area under precision-recall curve)
+        [1] - dictionary with following elements
+            ds - N array containing distances between two patches shown to human evaluator
+            sames - N array containing fraction of people who thought the two patches were identical
+    CONSTS
+        N - number of test triplets in data_loader
+    '''
+    ds = []
+    gts = []
+    for data in tqdm(data_loader.load_data(), desc=name):
+        ds += func(data['p0'], data['p1']).data.cpu().numpy().tolist()
+        gts += data['same'].cpu().numpy().flatten().tolist()
+    sames = np.array(gts)
+    ds = np.array(ds)
+    sorted_inds = np.argsort(ds)
+    ds_sorted = ds[sorted_inds]
+    sames_sorted = sames[sorted_inds]
+    TPs = np.cumsum(sames_sorted)
+    FPs = np.cumsum(1 - sames_sorted)
+    FNs = np.sum(sames_sorted) - TPs
+    precs = TPs / (TPs + FPs)
+    recs = TPs / (TPs + FNs)
+    score = util.voc_ap(recs, precs)
+    return(score, dict(ds=ds, sames=sames))

stylegan2/lpips/networks_basic.py ADDED Viewed

	@@ -0,0 +1,188 @@

+from __future__ import absolute_import
+import sys
+import torch
+import torch.nn as nn
+import torch.nn.init as init
+from torch.autograd import Variable
+import numpy as np
+from pdb import set_trace as st
+from skimage import color
+from IPython import embed
+from . import pretrained_networks as pn
+from . import util
+def spatial_average(in_tens, keepdim=True):
+    return in_tens.mean([2,3],keepdim=keepdim)
+def upsample(in_tens, out_H=64): # assumes scale factor is same for H and W
+    in_H = in_tens.shape[2]
+    scale_factor = 1.*out_H/in_H
+    return nn.Upsample(scale_factor=scale_factor, mode='bilinear', align_corners=False)(in_tens)
+# Learned perceptual metric
+class PNetLin(nn.Module):
+    def __init__(self, pnet_type='vgg', pnet_rand=False, pnet_tune=False, use_dropout=True, spatial=False, version='0.1', lpips=True):
+        super(PNetLin, self).__init__()
+        self.pnet_type = pnet_type
+        self.pnet_tune = pnet_tune
+        self.pnet_rand = pnet_rand
+        self.spatial = spatial
+        self.lpips = lpips
+        self.version = version
+        self.scaling_layer = ScalingLayer()
+        if(self.pnet_type in ['vgg','vgg16']):
+            net_type = pn.vgg16
+            self.chns = [64,128,256,512,512]
+        elif(self.pnet_type=='alex'):
+            net_type = pn.alexnet
+            self.chns = [64,192,384,256,256]
+        elif(self.pnet_type=='squeeze'):
+            net_type = pn.squeezenet
+            self.chns = [64,128,256,384,384,512,512]
+        self.L = len(self.chns)
+        self.net = net_type(pretrained=not self.pnet_rand, requires_grad=self.pnet_tune)
+        if(lpips):
+            self.lin0 = NetLinLayer(self.chns[0], use_dropout=use_dropout)
+            self.lin1 = NetLinLayer(self.chns[1], use_dropout=use_dropout)
+            self.lin2 = NetLinLayer(self.chns[2], use_dropout=use_dropout)
+            self.lin3 = NetLinLayer(self.chns[3], use_dropout=use_dropout)
+            self.lin4 = NetLinLayer(self.chns[4], use_dropout=use_dropout)
+            self.lins = [self.lin0,self.lin1,self.lin2,self.lin3,self.lin4]
+            if(self.pnet_type=='squeeze'): # 7 layers for squeezenet
+                self.lin5 = NetLinLayer(self.chns[5], use_dropout=use_dropout)
+                self.lin6 = NetLinLayer(self.chns[6], use_dropout=use_dropout)
+                self.lins+=[self.lin5,self.lin6]
+    def forward(self, in0, in1, retPerLayer=False):
+        # v0.0 - original release had a bug, where input was not scaled
+        in0_input, in1_input = (self.scaling_layer(in0), self.scaling_layer(in1)) if self.version=='0.1' else (in0, in1)
+        outs0, outs1 = self.net.forward(in0_input), self.net.forward(in1_input)
+        feats0, feats1, diffs = {}, {}, {}
+        for kk in range(self.L):
+            feats0[kk], feats1[kk] = util.normalize_tensor(outs0[kk]), util.normalize_tensor(outs1[kk])
+            diffs[kk] = (feats0[kk]-feats1[kk])**2
+        if(self.lpips):
+            if(self.spatial):
+                res = [upsample(self.lins[kk].model(diffs[kk]), out_H=in0.shape[2]) for kk in range(self.L)]
+            else:
+                res = [spatial_average(self.lins[kk].model(diffs[kk]), keepdim=True) for kk in range(self.L)]
+        else:
+            if(self.spatial):
+                res = [upsample(diffs[kk].sum(dim=1,keepdim=True), out_H=in0.shape[2]) for kk in range(self.L)]
+            else:
+                res = [spatial_average(diffs[kk].sum(dim=1,keepdim=True), keepdim=True) for kk in range(self.L)]
+        val = res[0]
+        for l in range(1,self.L):
+            val += res[l]
+        if(retPerLayer):
+            return (val, res)
+        else:
+            return val
+class ScalingLayer(nn.Module):
+    def __init__(self):
+        super(ScalingLayer, self).__init__()
+        self.register_buffer('shift', torch.Tensor([-.030,-.088,-.188])[None,:,None,None])
+        self.register_buffer('scale', torch.Tensor([.458,.448,.450])[None,:,None,None])
+    def forward(self, inp):
+        return (inp - self.shift) / self.scale
+class NetLinLayer(nn.Module):
+    ''' A single linear layer which does a 1x1 conv '''
+    def __init__(self, chn_in, chn_out=1, use_dropout=False):
+        super(NetLinLayer, self).__init__()
+        layers = [nn.Dropout(),] if(use_dropout) else []
+        layers += [nn.Conv2d(chn_in, chn_out, 1, stride=1, padding=0, bias=False),]
+        self.model = nn.Sequential(*layers)
+class Dist2LogitLayer(nn.Module):
+    ''' takes 2 distances, puts through fc layers, spits out value between [0,1] (if use_sigmoid is True) '''
+    def __init__(self, chn_mid=32, use_sigmoid=True):
+        super(Dist2LogitLayer, self).__init__()
+        layers = [nn.Conv2d(5, chn_mid, 1, stride=1, padding=0, bias=True),]
+        layers += [nn.LeakyReLU(0.2,True),]
+        layers += [nn.Conv2d(chn_mid, chn_mid, 1, stride=1, padding=0, bias=True),]
+        layers += [nn.LeakyReLU(0.2,True),]
+        layers += [nn.Conv2d(chn_mid, 1, 1, stride=1, padding=0, bias=True),]
+        if(use_sigmoid):
+            layers += [nn.Sigmoid(),]
+        self.model = nn.Sequential(*layers)
+    def forward(self,d0,d1,eps=0.1):
+        return self.model.forward(torch.cat((d0,d1,d0-d1,d0/(d1+eps),d1/(d0+eps)),dim=1))
+class BCERankingLoss(nn.Module):
+    def __init__(self, chn_mid=32):
+        super(BCERankingLoss, self).__init__()
+        self.net = Dist2LogitLayer(chn_mid=chn_mid)
+        # self.parameters = list(self.net.parameters())
+        self.loss = torch.nn.BCELoss()
+    def forward(self, d0, d1, judge):
+        per = (judge+1.)/2.
+        self.logit = self.net.forward(d0,d1)
+        return self.loss(self.logit, per)
+# L2, DSSIM metrics
+class FakeNet(nn.Module):
+    def __init__(self, use_gpu=True, colorspace='Lab'):
+        super(FakeNet, self).__init__()
+        self.use_gpu = use_gpu
+        self.colorspace=colorspace
+class L2(FakeNet):
+    def forward(self, in0, in1, retPerLayer=None):
+        assert(in0.size()[0]==1) # currently only supports batchSize 1
+        if(self.colorspace=='RGB'):
+            (N,C,X,Y) = in0.size()
+            value = torch.mean(torch.mean(torch.mean((in0-in1)**2,dim=1).view(N,1,X,Y),dim=2).view(N,1,1,Y),dim=3).view(N)
+            return value
+        elif(self.colorspace=='Lab'):
+            value = util.l2(util.tensor2np(util.tensor2tensorlab(in0.data,to_norm=False)),
+                util.tensor2np(util.tensor2tensorlab(in1.data,to_norm=False)), range=100.).astype('float')
+            ret_var = Variable( torch.Tensor((value,) ) )
+            if(self.use_gpu):
+                ret_var = ret_var.cuda()
+            return ret_var
+class DSSIM(FakeNet):
+    def forward(self, in0, in1, retPerLayer=None):
+        assert(in0.size()[0]==1) # currently only supports batchSize 1
+        if(self.colorspace=='RGB'):
+            value = util.dssim(1.*util.tensor2im(in0.data), 1.*util.tensor2im(in1.data), range=255.).astype('float')
+        elif(self.colorspace=='Lab'):
+            value = util.dssim(util.tensor2np(util.tensor2tensorlab(in0.data,to_norm=False)),
+                util.tensor2np(util.tensor2tensorlab(in1.data,to_norm=False)), range=100.).astype('float')
+        ret_var = Variable( torch.Tensor((value,) ) )
+        if(self.use_gpu):
+            ret_var = ret_var.cuda()
+        return ret_var
+def print_network(net):
+    num_params = 0
+    for param in net.parameters():
+        num_params += param.numel()
+    print('Network',net)
+    print('Total number of parameters: %d' % num_params)

stylegan2/lpips/pretrained_networks.py ADDED Viewed

	@@ -0,0 +1,181 @@

+from collections import namedtuple
+import torch
+from torchvision import models as tv
+from IPython import embed
+class squeezenet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(squeezenet, self).__init__()
+        pretrained_features = tv.squeezenet1_1(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.slice6 = torch.nn.Sequential()
+        self.slice7 = torch.nn.Sequential()
+        self.N_slices = 7
+        for x in range(2):
+            self.slice1.add_module(str(x), pretrained_features[x])
+        for x in range(2,5):
+            self.slice2.add_module(str(x), pretrained_features[x])
+        for x in range(5, 8):
+            self.slice3.add_module(str(x), pretrained_features[x])
+        for x in range(8, 10):
+            self.slice4.add_module(str(x), pretrained_features[x])
+        for x in range(10, 11):
+            self.slice5.add_module(str(x), pretrained_features[x])
+        for x in range(11, 12):
+            self.slice6.add_module(str(x), pretrained_features[x])
+        for x in range(12, 13):
+            self.slice7.add_module(str(x), pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1 = h
+        h = self.slice2(h)
+        h_relu2 = h
+        h = self.slice3(h)
+        h_relu3 = h
+        h = self.slice4(h)
+        h_relu4 = h
+        h = self.slice5(h)
+        h_relu5 = h
+        h = self.slice6(h)
+        h_relu6 = h
+        h = self.slice7(h)
+        h_relu7 = h
+        vgg_outputs = namedtuple("SqueezeOutputs", ['relu1','relu2','relu3','relu4','relu5','relu6','relu7'])
+        out = vgg_outputs(h_relu1,h_relu2,h_relu3,h_relu4,h_relu5,h_relu6,h_relu7)
+        return out
+class alexnet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(alexnet, self).__init__()
+        alexnet_pretrained_features = tv.alexnet(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.N_slices = 5
+        for x in range(2):
+            self.slice1.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(2, 5):
+            self.slice2.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(5, 8):
+            self.slice3.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(8, 10):
+            self.slice4.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(10, 12):
+            self.slice5.add_module(str(x), alexnet_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1 = h
+        h = self.slice2(h)
+        h_relu2 = h
+        h = self.slice3(h)
+        h_relu3 = h
+        h = self.slice4(h)
+        h_relu4 = h
+        h = self.slice5(h)
+        h_relu5 = h
+        alexnet_outputs = namedtuple("AlexnetOutputs", ['relu1', 'relu2', 'relu3', 'relu4', 'relu5'])
+        out = alexnet_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5)
+        return out
+class vgg16(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(vgg16, self).__init__()
+        vgg_pretrained_features = tv.vgg16(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.N_slices = 5
+        for x in range(4):
+            self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(4, 9):
+            self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(9, 16):
+            self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(16, 23):
+            self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(23, 30):
+            self.slice5.add_module(str(x), vgg_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1_2 = h
+        h = self.slice2(h)
+        h_relu2_2 = h
+        h = self.slice3(h)
+        h_relu3_3 = h
+        h = self.slice4(h)
+        h_relu4_3 = h
+        h = self.slice5(h)
+        h_relu5_3 = h
+        vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3', 'relu5_3'])
+        out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3, h_relu5_3)
+        return out
+class resnet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True, num=18):
+        super(resnet, self).__init__()
+        if(num==18):
+            self.net = tv.resnet18(pretrained=pretrained)
+        elif(num==34):
+            self.net = tv.resnet34(pretrained=pretrained)
+        elif(num==50):
+            self.net = tv.resnet50(pretrained=pretrained)
+        elif(num==101):
+            self.net = tv.resnet101(pretrained=pretrained)
+        elif(num==152):
+            self.net = tv.resnet152(pretrained=pretrained)
+        self.N_slices = 5
+        self.conv1 = self.net.conv1
+        self.bn1 = self.net.bn1
+        self.relu = self.net.relu
+        self.maxpool = self.net.maxpool
+        self.layer1 = self.net.layer1
+        self.layer2 = self.net.layer2
+        self.layer3 = self.net.layer3
+        self.layer4 = self.net.layer4
+    def forward(self, X):
+        h = self.conv1(X)
+        h = self.bn1(h)
+        h = self.relu(h)
+        h_relu1 = h
+        h = self.maxpool(h)
+        h = self.layer1(h)
+        h_conv2 = h
+        h = self.layer2(h)
+        h_conv3 = h
+        h = self.layer3(h)
+        h_conv4 = h
+        h = self.layer4(h)
+        h_conv5 = h
+        outputs = namedtuple("Outputs", ['relu1','conv2','conv3','conv4','conv5'])
+        out = outputs(h_relu1, h_conv2, h_conv3, h_conv4, h_conv5)
+        return out

stylegan2/lpips/util.py ADDED Viewed

	@@ -0,0 +1,160 @@

+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+from skimage.metrics import structural_similarity
+import torch
+from . import dist_model
+class PerceptualLoss(torch.nn.Module):
+    def __init__(self, model='net-lin', net='alex', colorspace='rgb', spatial=False, use_gpu=True, gpu_ids=[0]): # VGG using our perceptually-learned weights (LPIPS metric)
+    # def __init__(self, model='net', net='vgg', use_gpu=True): # "default" way of using VGG as a perceptual loss
+        super(PerceptualLoss, self).__init__()
+        print('Setting up Perceptual loss...')
+        self.use_gpu = use_gpu
+        self.spatial = spatial
+        self.gpu_ids = gpu_ids
+        self.model = dist_model.DistModel()
+        self.model.initialize(model=model, net=net, use_gpu=use_gpu, colorspace=colorspace, spatial=self.spatial, gpu_ids=gpu_ids)
+        print('...[%s] initialized'%self.model.name())
+        print('...Done')
+    def forward(self, pred, target, normalize=False):
+        """
+        Pred and target are Variables.
+        If normalize is True, assumes the images are between [0,1] and then scales them between [-1,+1]
+        If normalize is False, assumes the images are already between [-1,+1]
+        Inputs pred and target are Nx3xHxW
+        Output pytorch Variable N long
+        """
+        if normalize:
+            target = 2 * target  - 1
+            pred = 2 * pred  - 1
+        return self.model.forward(target, pred)
+def normalize_tensor(in_feat,eps=1e-10):
+    norm_factor = torch.sqrt(torch.sum(in_feat**2,dim=1,keepdim=True))
+    return in_feat/(norm_factor+eps)
+def l2(p0, p1, range=255.):
+    return .5*np.mean((p0 / range - p1 / range)**2)
+def psnr(p0, p1, peak=255.):
+    return 10*np.log10(peak**2/np.mean((1.*p0-1.*p1)**2))
+def dssim(p0, p1, range=255.):
+    return (1 - structural_similarity(p0, p1, data_range=range, multichannel=True)) / 2.
+def rgb2lab(in_img,mean_cent=False):
+    from skimage import color
+    img_lab = color.rgb2lab(in_img)
+    if(mean_cent):
+        img_lab[:,:,0] = img_lab[:,:,0]-50
+    return img_lab
+def tensor2np(tensor_obj):
+    # change dimension of a tensor object into a numpy array
+    return tensor_obj[0].cpu().float().numpy().transpose((1,2,0))
+def np2tensor(np_obj):
+     # change dimenion of np array into tensor array
+    return torch.Tensor(np_obj[:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
+def tensor2tensorlab(image_tensor,to_norm=True,mc_only=False):
+    # image tensor to lab tensor
+    from skimage import color
+    img = tensor2im(image_tensor)
+    img_lab = color.rgb2lab(img)
+    if(mc_only):
+        img_lab[:,:,0] = img_lab[:,:,0]-50
+    if(to_norm and not mc_only):
+        img_lab[:,:,0] = img_lab[:,:,0]-50
+        img_lab = img_lab/100.
+    return np2tensor(img_lab)
+def tensorlab2tensor(lab_tensor,return_inbnd=False):
+    from skimage import color
+    import warnings
+    warnings.filterwarnings("ignore")
+    lab = tensor2np(lab_tensor)*100.
+    lab[:,:,0] = lab[:,:,0]+50
+    rgb_back = 255.*np.clip(color.lab2rgb(lab.astype('float')),0,1)
+    if(return_inbnd):
+        # convert back to lab, see if we match
+        lab_back = color.rgb2lab(rgb_back.astype('uint8'))
+        mask = 1.*np.isclose(lab_back,lab,atol=2.)
+        mask = np2tensor(np.prod(mask,axis=2)[:,:,np.newaxis])
+        return (im2tensor(rgb_back),mask)
+    else:
+        return im2tensor(rgb_back)
+def rgb2lab(input):
+    from skimage import color
+    return color.rgb2lab(input / 255.)
+def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255./2.):
+    image_numpy = image_tensor[0].cpu().float().numpy()
+    image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor
+    return image_numpy.astype(imtype)
+def im2tensor(image, imtype=np.uint8, cent=1., factor=255./2.):
+    return torch.Tensor((image / factor - cent)
+                        [:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
+def tensor2vec(vector_tensor):
+    return vector_tensor.data.cpu().numpy()[:, :, 0, 0]
+def voc_ap(rec, prec, use_07_metric=False):
+    """ ap = voc_ap(rec, prec, [use_07_metric])
+    Compute VOC AP given precision and recall.
+    If use_07_metric is true, uses the
+    VOC 07 11 point method (default:False).
+    """
+    if use_07_metric:
+        # 11 point metric
+        ap = 0.
+        for t in np.arange(0., 1.1, 0.1):
+            if np.sum(rec >= t) == 0:
+                p = 0
+            else:
+                p = np.max(prec[rec >= t])
+            ap = ap + p / 11.
+    else:
+        # correct AP calculation
+        # first append sentinel values at the end
+        mrec = np.concatenate(([0.], rec, [1.]))
+        mpre = np.concatenate(([0.], prec, [0.]))
+        # compute the precision envelope
+        for i in range(mpre.size - 1, 0, -1):
+            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+        # to calculate area under PR curve, look for points
+        # where X axis (recall) changes value
+        i = np.where(mrec[1:] != mrec[:-1])[0]
+        # and sum (\Delta recall) * prec
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255./2.):
+# def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=1.):
+    image_numpy = image_tensor[0].cpu().float().numpy()
+    image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor
+    return image_numpy.astype(imtype)
+def im2tensor(image, imtype=np.uint8, cent=1., factor=255./2.):
+# def im2tensor(image, imtype=np.uint8, cent=1., factor=1.):
+    return torch.Tensor((image / factor - cent)
+                        [:, :, :, np.newaxis].transpose((3, 2, 0, 1)))

stylegan2/model.py CHANGED Viewed

@@ -5,7 +5,25 @@ import torch
 from torch import nn
 from torch.nn import functional as F
-from .op import FusedLeakyReLU, fused_leaky_relu, upfirdn2d, conv2d_gradfix
 class PixelNorm(nn.Module):

 from torch import nn
 from torch.nn import functional as F
+from .op.fused_act import fused
+if fused is not None:
+    from .op.fused_act import FusedLeakyReLU, fused_leaky_relu
+else:
+    from .op import FusedLeakyReLU_Native as FusedLeakyReLU
+    from .op import fused_leaky_relu_native as fused_leaky_relu
+from .op.upfirdn2d import upfirdn2d_op
+if upfirdn2d_op is not None:
+    from .op.upfirdn2d import upfirdn2d
+else:
+    from .op import upfirdn2d_native as upfirdn2d
+from .op import conv2d_gradfix
+# https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.py#L152
+# https://github.com/rosinality/stylegan2-pytorch/issues/70
 class PixelNorm(nn.Module):

stylegan2/op/__init__.py CHANGED Viewed

	@@ -1,2 +1,2 @@
1	- from .fused_act import FusedLeakyReLU, fused_leaky_relu
2	- from .upfirdn2d import upfirdn2d


1	+ from .fused_act import FusedLeakyReLU, fused_leaky_relu, fused_leaky_relu_native, FusedLeakyReLU_Native
2	+ from .upfirdn2d import upfirdn2d, upfirdn2d_native

stylegan2/op/conv2d_gradfix.py CHANGED Viewed

@@ -76,6 +76,8 @@ def conv_transpose2d(
 def could_use_op(input):
     if (not enabled) or (not torch.backends.cudnn.enabled):
         return False

 def could_use_op(input):
+    return False
     if (not enabled) or (not torch.backends.cudnn.enabled):
         return False

stylegan2/op/fused_act.py CHANGED Viewed

@@ -6,15 +6,24 @@ from torch.nn import functional as F
 from torch.autograd import Function
 from torch.utils.cpp_extension import load
-module_path = os.path.dirname(__file__)
-fused = load(
-    "fused",
-    sources=[
-        os.path.join(module_path, "fused_bias_act.cpp"),
-        os.path.join(module_path, "fused_bias_act_kernel.cu"),
-    ],
-)
 class FusedLeakyReLUFunctionBackward(Function):
@@ -125,3 +134,24 @@ def fused_leaky_relu(input, bias=None, negative_slope=0.2, scale=2 ** 0.5):
         return FusedLeakyReLUFunction.apply(
             input.contiguous(), bias, negative_slope, scale
         )

 from torch.autograd import Function
 from torch.utils.cpp_extension import load
+import warnings
+module_path = os.path.dirname(os.path.abspath(__file__))
+try:
+    fused = load(
+        "fused",
+        sources=[
+            os.path.join(module_path, "fused_bias_act.cpp"),
+            os.path.join(module_path, "fused_bias_act_kernel.cu"),
+        ],
+    )
+except:
+    warnings.warn(
+        f"(This is not error) Switch to native implementation"
+    )
+    fused = None
 class FusedLeakyReLUFunctionBackward(Function):
         return FusedLeakyReLUFunction.apply(
             input.contiguous(), bias, negative_slope, scale
         )
+class FusedLeakyReLU_Native(nn.Module):
+    def __init__(self, channel, bias=True, negative_slope=0.2, scale=2 ** 0.5):
+        super().__init__()
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(channel))
+        else:
+            self.bias = None
+        self.negative_slope = negative_slope
+        self.scale = scale
+    def forward(self, input):
+        return fused_leaky_relu_native(input, self.bias, self.negative_slope, self.scale)
+def fused_leaky_relu_native(input, bias, negative_slope=0.2, scale=2 ** 0.5):
+    return scale * F.leaky_relu(input + bias.view((1, -1) + (1,) * (len(input.shape) - 2)), negative_slope=negative_slope)

stylegan2/op/upfirdn2d.py CHANGED Viewed

@@ -5,16 +5,24 @@ import torch
 from torch.nn import functional as F
 from torch.autograd import Function
 from torch.utils.cpp_extension import load
-module_path = os.path.dirname(__file__)
-upfirdn2d_op = load(
-    "upfirdn2d",
-    sources=[
-        os.path.join(module_path, "upfirdn2d.cpp"),
-        os.path.join(module_path, "upfirdn2d_kernel.cu"),
-    ],
-)
 class UpFirDn2dBackward(Function):
@@ -157,7 +165,7 @@ def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
         pad = (pad[0], pad[1], pad[0], pad[1])
     if input.device.type == "cpu":
-        out = upfirdn2d_native(input, kernel, *up, *down, *pad)
     else:
         out = UpFirDn2d.apply(input, kernel, up, down, pad)
@@ -165,7 +173,22 @@ def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
     return out
-def upfirdn2d_native(
     input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1
 ):
     _, channel, in_h, in_w = input.shape
@@ -183,8 +206,8 @@ def upfirdn2d_native(
     )
     out = out[
         :,
-        max(-pad_y0, 0) : out.shape[1] - max(-pad_y1, 0),
-        max(-pad_x0, 0) : out.shape[2] - max(-pad_x1, 0),
         :,
     ]

 from torch.nn import functional as F
 from torch.autograd import Function
 from torch.utils.cpp_extension import load
+import warnings
+module_path = os.path.dirname(os.path.abspath(__file__))
+try:
+    upfirdn2d_op = load(
+        "upfirdn2d",
+        sources=[
+            os.path.join(module_path, "upfirdn2d.cpp"),
+            os.path.join(module_path, "upfirdn2d_kernel.cu"),
+        ],
+    )
+except:
+    warnings.warn(
+        f"(This is not error) Switch to native implementation"
+    )
+    upfirdn2d_op = None
 class UpFirDn2dBackward(Function):
         pad = (pad[0], pad[1], pad[0], pad[1])
     if input.device.type == "cpu":
+        out = _upfirdn2d_native(input, kernel, *up, *down, *pad)
     else:
         out = UpFirDn2d.apply(input, kernel, up, down, pad)
     return out
+def upfirdn2d_native(input, kernel, up=1, down=1, pad=(0, 0)):
+    if not isinstance(up, abc.Iterable):
+        up = (up, up)
+    if not isinstance(down, abc.Iterable):
+        down = (down, down)
+    if len(pad) == 2:
+        pad = (pad[0], pad[1], pad[0], pad[1])
+    out = _upfirdn2d_native(input, kernel, *up, *down, *pad)
+    return out
+def _upfirdn2d_native(
     input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1
 ):
     _, channel, in_h, in_w = input.shape
     )
     out = out[
         :,
+        max(-pad_y0, 0): out.shape[1] - max(-pad_y1, 0),
+        max(-pad_x0, 0): out.shape[2] - max(-pad_x1, 0),
         :,
     ]