sp3d

Paused

App Files Files Community

sp3d / run.py

mboss

Update inference to latest

4d8c3d6 over 1 year ago

raw

history blame contribute delete

6.48 kB

	import argparse
	import os
	from contextlib import nullcontext

	import torch
	from PIL import Image
	from tqdm import tqdm
	from transparent_background import Remover

	from spar3d.models.mesh import QUAD_REMESH_AVAILABLE, TRIANGLE_REMESH_AVAILABLE
	from spar3d.system import SPAR3D
	from spar3d.utils import foreground_crop, get_device, remove_background


	def check_positive(value):
	ivalue = int(value)
	if ivalue <= 0:
	raise argparse.ArgumentTypeError("%s is an invalid positive int value" % value)
	return ivalue


	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"image", type=str, nargs="+", help="Path to input image(s) or folder."
	)
	parser.add_argument(
	"--device",
	default=get_device(),
	type=str,
	help=f"Device to use. If no CUDA/MPS-compatible device is found, the baking will fail. Default: '{get_device()}'",
	)
	parser.add_argument(
	"--pretrained-model",
	default="stabilityai/stable-point-aware-3d",
	type=str,
	help="Path to the pretrained model. Could be either a huggingface model id is or a local path. Default: 'stabilityai/stable-point-aware-3d'",
	)
	parser.add_argument(
	"--foreground-ratio",
	default=1.3,
	type=float,
	help="Ratio of the foreground size to the image size. Only used when --no-remove-bg is not specified. Default: 0.85",
	)
	parser.add_argument(
	"--output-dir",
	default="output/",
	type=str,
	help="Output directory to save the results. Default: 'output/'",
	)
	parser.add_argument(
	"--texture-resolution",
	default=1024,
	type=int,
	help="Texture atlas resolution. Default: 1024",
	)
	parser.add_argument(
	"--low-vram-mode",
	action="store_true",
	help=(
	"Use low VRAM mode. SPAR3D consumes 10.5GB of VRAM by default. "
	"This mode will reduce the VRAM consumption to roughly 7GB but in exchange "
	"the model will be slower. Default: False"
	),
	)

	remesh_choices = ["none"]
	if TRIANGLE_REMESH_AVAILABLE:
	remesh_choices.append("triangle")
	if QUAD_REMESH_AVAILABLE:
	remesh_choices.append("quad")
	parser.add_argument(
	"--remesh_option",
	choices=remesh_choices,
	default="none",
	help="Remeshing option",
	)
	if TRIANGLE_REMESH_AVAILABLE or QUAD_REMESH_AVAILABLE:
	parser.add_argument(
	"--reduction_count_type",
	choices=["keep", "vertex", "faces"],
	default="keep",
	help="Vertex count type",
	)
	parser.add_argument(
	"--target_count",
	type=check_positive,
	help="Selected target count.",
	default=2000,
	)
	parser.add_argument(
	"--batch_size", default=1, type=int, help="Batch size for inference"
	)
	args = parser.parse_args()

	# Ensure args.device contains cuda
	devices = ["cuda", "mps", "cpu"]
	if not any(args.device in device for device in devices):
	raise ValueError("Invalid device. Use cuda, mps or cpu")

	output_dir = args.output_dir
	os.makedirs(output_dir, exist_ok=True)

	device = args.device
	if not (torch.cuda.is_available() or torch.backends.mps.is_available()):
	device = "cpu"

	print("Device used: ", device)

	model = SPAR3D.from_pretrained(
	args.pretrained_model,
	config_name="config.yaml",
	weight_name="model.safetensors",
	low_vram_mode=args.low_vram_mode,
	)
	model.to(device)
	model.eval()

	bg_remover = Remover(device=device)
	images = []
	idx = 0
	for image_path in args.image:

	def handle_image(image_path, idx):
	image = remove_background(
	Image.open(image_path).convert("RGBA"), bg_remover
	)
	image = foreground_crop(image, args.foreground_ratio)
	os.makedirs(os.path.join(output_dir, str(idx)), exist_ok=True)
	image.save(os.path.join(output_dir, str(idx), "input.png"))
	images.append(image)

	if os.path.isdir(image_path):
	image_paths = [
	os.path.join(image_path, f)
	for f in os.listdir(image_path)
	if f.endswith((".png", ".jpg", ".jpeg"))
	]
	for image_path in image_paths:
	handle_image(image_path, idx)
	idx += 1
	else:
	handle_image(image_path, idx)
	idx += 1

	vertex_count = (
	-1
	if args.reduction_count_type == "keep"
	else (
	args.target_count
	if args.reduction_count_type == "vertex"
	else args.target_count // 2
	)
	)

	for i in tqdm(range(0, len(images), args.batch_size)):
	image = images[i : i + args.batch_size]
	if torch.cuda.is_available():
	torch.cuda.reset_peak_memory_stats()
	with torch.no_grad():
	with (
	torch.autocast(device_type=device, dtype=torch.bfloat16)
	if "cuda" in device
	else nullcontext()
	):
	mesh, glob_dict = model.run_image(
	image,
	bake_resolution=args.texture_resolution,
	remesh=args.remesh_option,
	vertex_count=vertex_count,
	return_points=True,
	)
	if torch.cuda.is_available():
	print("Peak Memory:", torch.cuda.max_memory_allocated() / 1024 / 1024, "MB")
	elif torch.backends.mps.is_available():
	print(
	"Peak Memory:", torch.mps.driver_allocated_memory() / 1024 / 1024, "MB"
	)

	if len(image) == 1:
	out_mesh_path = os.path.join(output_dir, str(i), "mesh.glb")
	mesh.export(out_mesh_path, include_normals=True)
	out_points_path = os.path.join(output_dir, str(i), "points.ply")
	glob_dict["point_clouds"][0].export(out_points_path)
	else:
	for j in range(len(mesh)):
	out_mesh_path = os.path.join(output_dir, str(i + j), "mesh.glb")
	mesh[j].export(out_mesh_path, include_normals=True)
	out_points_path = os.path.join(output_dir, str(i + j), "points.ply")
	glob_dict["point_clouds"][j].export(out_points_path)