forked from google-research/fitvid
-
Notifications
You must be signed in to change notification settings - Fork 0
/
metrics.py
110 lines (79 loc) · 3.14 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Metrics."""
import numpy as np
import tensorflow.compat.v2 as tf
import tensorflow_gan as tfgan
import tensorflow_hub as hub
i3d_model = None
lpips_model = None
def flatten_video(video):
return np.reshape(video, (-1,) + video.shape[2:])
def psnr(video_1, video_2):
video_1 = flatten_video(video_1)
video_2 = flatten_video(video_2)
dist = tf.image.psnr(video_1, video_2, max_val=1.0)
return np.mean(dist.numpy())
def ssim(video_1, video_2):
video_1 = flatten_video(video_1)
video_2 = flatten_video(video_2)
dist = tf.image.ssim(video_1, video_2, max_val=1.0)
return np.mean(dist.numpy())
def psnr_image(target_image, out_image):
dist = tf.image.psnr(target_image, out_image, max_val=1.0)
return np.mean(dist.numpy())
def psnr_per_frame(target_video, out_video):
max_val = 1.0
mse = np.mean(np.square(out_video - target_video), axis=(2, 3, 4))
return 20 * np.log10(max_val) - 10.0 * np.log10(mse)
def lpips_image(generated_image, real_image):
global lpips_model
result = tf.convert_to_tensor(0.0)
return result
def lpips(video_1, video_2):
video_1 = flatten_video(video_1)
video_2 = flatten_video(video_2)
dist = lpips_image(video_1, video_2)
return np.mean(dist.numpy())
def fvd_preprocess(videos, target_resolution):
videos = tf.convert_to_tensor(videos * 255.0, dtype=tf.float32)
videos_shape = videos.shape.as_list()
all_frames = tf.reshape(videos, [-1] + videos_shape[-3:])
resized_videos = tf.image.resize(all_frames, size=target_resolution)
target_shape = [videos_shape[0], -1] + list(target_resolution) + [3]
output_videos = tf.reshape(resized_videos, target_shape)
scaled_videos = 2. * tf.cast(output_videos, tf.float32) / 255. - 1
return scaled_videos
def create_id3_embedding(videos):
"""Get id3 embeddings."""
global i3d_model
module_spec = 'https://tfhub.dev/deepmind/i3d-kinetics-400/1'
if not i3d_model:
base_model = hub.load(module_spec)
input_tensor = base_model.graph.get_tensor_by_name('input_frames:0')
i3d_model = base_model.prune(input_tensor, 'RGB/inception_i3d/Mean:0')
output = i3d_model(videos)
return output
def calculate_fvd(real_activations, generated_activations):
return tfgan.eval.frechet_classifier_distance_from_activations(
real_activations, generated_activations)
def fvd(video_1, video_2):
video_1 = fvd_preprocess(video_1, (224, 224))
video_2 = fvd_preprocess(video_2, (224, 224))
x = create_id3_embedding(video_1)
y = create_id3_embedding(video_2)
result = calculate_fvd(x, y)
return result.numpy()
def inception_score(images):
return tfgan.eval.inception_score(images)