#Ryan Burgert 2024 #Setup: # Run this in a Jupyter Notebook on a computer with at least one GPU # `sudo apt install ffmpeg git` # `pip install rp` # The first time you run this it might be a bit slow (it will download necessary models) # The `rp` package will take care of installing the rest of the python packages for you import rp rp.r._pip_import_autoyes=True #Automatically install missing packages rp.pip_import('fire') rp.git_import('CommonSource') #If missing, installs code from https://github.com/RyannDaGreat/CommonSource import rp.git.CommonSource.noise_warp as nw import fire def main(video:str, output_folder:str): """ Takes a video URL or filepath and an output folder path It then resizes that video to height=480, width=720, 49 frames (CogVidX's dimensions) Then it calculates warped noise at latent resolution (i.e. 1/8 of the width and height) with 16 channels It saves that warped noise, optical flows, and related preview videos and images to the output folder The main file you need is /noises.npy which is the gaussian noises in (H,W,C) form """ if rp.folder_exists(output_folder): raise RuntimeError(f"The given output_folder={repr(output_folder)} already exists! To avoid clobbering what might be in there, please specify a folder that doesn't exist so I can create one for you. Alternatively, you could delete that folder if you don't care whats in it.") FRAME = 2**-1 #We immediately resize the input frames by this factor, before calculating optical flow #The flow is calulated at (input size) × FRAME resolution. #Higher FLOW values result in slower optical flow calculation and higher intermediate noise resolution #Larger is not always better - watch the preview in Jupyter to see if it looks good! FLOW = 2**3 #Then, we use bilinear interpolation to upscale the flow by this factor #We warp the noise at (input size) × FRAME × FLOW resolution #The noise is then downsampled back to (input size) #Higher FLOW values result in more temporally consistent noise warping at the cost of higher VRAM usage and slower inference time LATENT = 8 #We further downsample the outputs by this amount - because 8 pixels wide corresponds to one latent wide in Stable Diffusion #The final output size is (input size) ÷ LATENT regardless of FRAME and FLOW #LATENT = 1 #Uncomment this line for a prettier visualization! But for latent diffusion models, use LATENT=8 #You can also use video files or URLs # video = "https://www.shutterstock.com/shutterstock/videos/1100085499/preview/stock-footage-bremen-germany-october-old-style-carousel-moving-on-square-in-city-horses-on-traditional.webm" # output_folder = "NoiseWarpOutputFolder" if isinstance(video,str): video=rp.load_video(video) #Preprocess the video video=rp.resize_list(video,length=49) #Stretch or squash video to 49 frames (CogVideoX's length) video=rp.resize_images_to_hold(video,height=480,width=720) video=rp.crop_images(video,height=480,width=720,origin='center') #Make the resolution 480x720 (CogVideoX's resolution) video=rp.as_numpy_array(video) #See this function's docstring for more information! output = nw.get_noise_from_video( video, remove_background=False, #Set this to True to matte the foreground - and force the background to have no flow visualize=True, #Generates nice visualization videos and previews in Jupyter notebook save_files=True, #Set this to False if you just want the noises without saving to a numpy file noise_channels=16, output_folder=output_folder, resize_frames=FRAME, resize_flow=FLOW, downscale_factor=round(FRAME * FLOW) * LATENT, ) output.first_frame_path = rp.save_image(video[0],rp.path_join(output_folder,'first_frame.png')) rp.save_video_mp4(video, rp.path_join(output_folder, 'input.mp4'), framerate=12, video_bitrate='max') #output.numpy_noises_downsampled = as_numpy_images( #nw.resize_noise( #as_torch_images(x), #1 / 8, #)for x #) # #output.numpy_noises_downsampled_path = path_join(output_folder, 'noises_downsampled.npy') #np.save(numpy_noises_downsampled_path, output.numpy_noises_downsampled) print("Noise shape:" ,output.numpy_noises.shape) print("Flow shape:" ,output.numpy_flows .shape) print("Output folder:",output.output_folder) if __name__ == "__main__": fire.Fire(main)