Spaces:
Running
Running
upload app
Browse files- .gitattributes +3 -0
- app.py +40 -0
- assets/examples/myself.jpeg +3 -0
- model/__init__.py +28 -0
- model/depth_estimation.py +38 -0
- model/segmentation.py +65 -0
- requirements.txt +7 -0
.gitattributes
CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
37 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
38 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from model import predict
|
3 |
+
|
4 |
+
color_maps = [
|
5 |
+
'viridis', 'plasma', 'inferno', 'magma', 'cividis',
|
6 |
+
'Greys', 'Purples', 'Blues', 'Greens', 'Oranges', 'Reds',
|
7 |
+
'YlOrBr', 'YlOrRd', 'OrRd', 'PuRd', 'RdPu', 'BuPu',
|
8 |
+
'GnBu', 'PuBu', 'YlGnBu', 'PuBuGn', 'BuGn', 'YlGn',
|
9 |
+
'binary', 'gist_yarg', 'gist_gray', 'gray', 'bone',
|
10 |
+
'pink', 'spring', 'summer', 'autumn', 'winter', 'cool',
|
11 |
+
'Wistia', 'hot', 'afmhot', 'gist_heat', 'copper',
|
12 |
+
'PiYG', 'PRGn', 'BrBG', 'PuOr', 'RdGy', 'RdBu', 'RdYlBu',
|
13 |
+
'RdYlGn', 'Spectral', 'coolwarm', 'bwr', 'seismic',
|
14 |
+
'twilight', 'twilight_shifted', 'hsv',
|
15 |
+
'Pastel1', 'Pastel2', 'Paired', 'Accent', 'Dark2',
|
16 |
+
'Set1', 'Set2', 'Set3', 'tab10', 'tab20', 'tab20b', 'tab20c',
|
17 |
+
'flag', 'prism', 'ocean', 'gist_earth', 'terrain',
|
18 |
+
'gist_stern', 'gnuplot', 'gnuplot2', 'CMRmap',
|
19 |
+
'cubehelix', 'brg', 'gist_rainbow', 'rainbow', 'jet',
|
20 |
+
'turbo', 'nipy_spectral', 'gist_ncar',
|
21 |
+
]
|
22 |
+
examples = [
|
23 |
+
["assets/examples/myself.jpeg", "afmhot"],
|
24 |
+
["assets/examples/myself.jpeg", "inferno"],
|
25 |
+
]
|
26 |
+
|
27 |
+
interface = gr.Interface(
|
28 |
+
fn=predict,
|
29 |
+
inputs=[
|
30 |
+
gr.Image(type="pil"),
|
31 |
+
gr.Dropdown(choices=color_maps),
|
32 |
+
],
|
33 |
+
outputs=gr.Image(type="pil"),
|
34 |
+
title="DepthPro: Colorify",
|
35 |
+
description="Applies segmentation on the input image, then creates the depth map and finally colorizes it.",
|
36 |
+
examples=examples,
|
37 |
+
)
|
38 |
+
|
39 |
+
if __name__ == "__main__":
|
40 |
+
interface.launch()
|
assets/examples/myself.jpeg
ADDED
![]() |
Git LFS Details
|
model/__init__.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PIL import Image
|
2 |
+
import numpy as np
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
from segmentation import predict as segmentation_predict
|
5 |
+
from depth_estimation import predict as depth_estimation_predict
|
6 |
+
|
7 |
+
def predict(image, color_map):
|
8 |
+
# inference
|
9 |
+
|
10 |
+
mask_image = segmentation_predict(image)
|
11 |
+
|
12 |
+
segmented_image = Image.composite(
|
13 |
+
image,
|
14 |
+
Image.new("RGB", image.size, (0, 0, 0)),
|
15 |
+
mask_image.convert("L")
|
16 |
+
)
|
17 |
+
|
18 |
+
depth_image = depth_estimation_predict(segmented_image)
|
19 |
+
|
20 |
+
# apply matplotlib colormap (e.g., viridis)
|
21 |
+
depth_array = np.array(depth_image) # Convert PIL image to NumPy array
|
22 |
+
colormap = plt.get_cmap(color_map) # Choose a colormap
|
23 |
+
depth_colored = colormap(depth_array[:, :, 0] / 255.0) # Normalize and apply colormap
|
24 |
+
depth_colored = (depth_colored * 255).astype(np.uint8) # Convert to RGB (discard alpha)
|
25 |
+
|
26 |
+
depth_colored = Image.fromarray(depth_colored)
|
27 |
+
|
28 |
+
return depth_colored
|
model/depth_estimation.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PIL import Image
|
2 |
+
import torch
|
3 |
+
|
4 |
+
# custom installation from this PR: https://github.com/huggingface/transformers/pull/34583
|
5 |
+
# !pip install git+https://github.com/geetu040/transformers.git@depth-pro-projects#egg=transformers
|
6 |
+
from transformers import DepthProImageProcessorFast, DepthProForDepthEstimation
|
7 |
+
|
8 |
+
# initialize processor and model
|
9 |
+
checkpoint = "geetu040/DepthPro"
|
10 |
+
revision = "project"
|
11 |
+
image_processor = DepthProImageProcessorFast.from_pretrained(checkpoint, revision=revision)
|
12 |
+
model = DepthProForDepthEstimation.from_pretrained(checkpoint, revision=revision)
|
13 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
14 |
+
model = model.to(device)
|
15 |
+
|
16 |
+
def predict(image):
|
17 |
+
# inference
|
18 |
+
|
19 |
+
# prepare image for the model
|
20 |
+
inputs = image_processor(images=image, return_tensors="pt")
|
21 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
22 |
+
|
23 |
+
with torch.no_grad():
|
24 |
+
outputs = model(**inputs)
|
25 |
+
|
26 |
+
# interpolate to original size
|
27 |
+
post_processed_output = image_processor.post_process_depth_estimation(
|
28 |
+
outputs, target_sizes=[(image.height, image.width)],
|
29 |
+
)
|
30 |
+
|
31 |
+
# visualize the prediction
|
32 |
+
depth = post_processed_output[0]["predicted_depth"]
|
33 |
+
depth = (depth - depth.min()) / depth.max()
|
34 |
+
depth = depth * 255.
|
35 |
+
depth = depth.detach().cpu().numpy()
|
36 |
+
depth = Image.fromarray(depth.astype("uint8"))
|
37 |
+
|
38 |
+
return depth
|
model/segmentation.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PIL import Image
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.nn.functional as F
|
5 |
+
from huggingface_hub import hf_hub_download
|
6 |
+
|
7 |
+
# custom installation from this PR: https://github.com/huggingface/transformers/pull/34583
|
8 |
+
# !pip install git+https://github.com/geetu040/transformers.git@depth-pro-projects#egg=transformers
|
9 |
+
from transformers import DepthProConfig, DepthProImageProcessorFast, DepthProForDepthEstimation
|
10 |
+
|
11 |
+
# initialize model
|
12 |
+
config = DepthProConfig(use_fov_model=False)
|
13 |
+
model = DepthProForDepthEstimation(config)
|
14 |
+
features = config.fusion_hidden_size
|
15 |
+
semantic_classifier_dropout = 0.1
|
16 |
+
num_labels = 1
|
17 |
+
model.head.head = nn.Sequential(
|
18 |
+
nn.Conv2d(features, features, kernel_size=3, padding=1, bias=False),
|
19 |
+
nn.BatchNorm2d(features),
|
20 |
+
nn.ReLU(),
|
21 |
+
nn.Dropout(semantic_classifier_dropout),
|
22 |
+
nn.Conv2d(features, features, kernel_size=1),
|
23 |
+
nn.ConvTranspose2d(features, num_labels, kernel_size=2, stride=2, padding=0, bias=True),
|
24 |
+
)
|
25 |
+
|
26 |
+
# load weights
|
27 |
+
weights_path = hf_hub_download(repo_id="geetu040/DepthPro_Segmentation_Human", filename="model_weights.pth")
|
28 |
+
model.load_state_dict(torch.load(weights_path, map_location=torch.device('cpu'), weights_only=True))
|
29 |
+
|
30 |
+
# load to device
|
31 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
32 |
+
model = model.to(device)
|
33 |
+
|
34 |
+
# load image processor
|
35 |
+
image_processor = DepthProImageProcessorFast()
|
36 |
+
|
37 |
+
def predict(image):
|
38 |
+
# inference
|
39 |
+
|
40 |
+
image = image.convert("RGB")
|
41 |
+
|
42 |
+
# prepare image for the model
|
43 |
+
inputs = image_processor(images=image, return_tensors="pt")
|
44 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
45 |
+
|
46 |
+
# inference
|
47 |
+
with torch.no_grad():
|
48 |
+
output = model(**inputs)
|
49 |
+
|
50 |
+
# convert tensors to PIL.Image
|
51 |
+
output = output[0] # get output logits
|
52 |
+
output = F.interpolate(
|
53 |
+
output.unsqueeze(0),
|
54 |
+
size=(image.height, image.width)
|
55 |
+
) # interpolate to match size
|
56 |
+
output = output.squeeze() # get first and only batch and channel
|
57 |
+
output = output.sigmoid() # apply sigmoid for binary segmentation
|
58 |
+
output = (output > 0.5).float() # threshold to create binary mask
|
59 |
+
output = output.cpu() # unload from cuda if used
|
60 |
+
output = output * 255 # convert [0, 1] to [0, 255]
|
61 |
+
output = output.numpy() # convert to numpy
|
62 |
+
output = output.astype('uint8') # convert to PIL.Image compatible format
|
63 |
+
output = Image.fromarray(output) # create PIL.Image object
|
64 |
+
|
65 |
+
return output
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
numpy
|
3 |
+
matplotlib
|
4 |
+
pillow
|
5 |
+
torch
|
6 |
+
torchvision
|
7 |
+
git+https://github.com/geetu040/transformers.git@depth-pro-projects#egg=transformers
|