CHEN11102
/

AI

Model card Files Files and versions Community

File size: 6,352 Bytes

1772f26

# Copyright 2022 Google LLC

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     https://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Beam pipeline that generates Vimeo-90K (train or test) triplet TFRecords.

Vimeo-90K dataset is built upon 5,846 videos downloaded from vimeo.com. The list
of the original video links are available here:
https://github.com/anchen1011/toflow/blob/master/data/original_vimeo_links.txt.
Each video is further cropped into a fixed spatial size of (448 x 256) to create
89,000 video clips.

The Vimeo-90K dataset is designed for four video processing tasks. This script
creates the TFRecords of frame triplets for frame interpolation task.

Temporal frame interpolation triplet dataset:
  - 73,171 triplets of size (448x256) extracted from 15K subsets of Vimeo-90K.
  - The triplets are pre-split into (train,test) = (51313,3782)
  - Download links:
    Test-set: http://data.csail.mit.edu/tofu/testset/vimeo_interp_test.zip
    Train+test-set: http://data.csail.mit.edu/tofu/dataset/vimeo_triplet.zip

For more information, see the arXiv paper, project page or the GitHub link.
@article{xue17toflow,
  author = {Xue, Tianfan and
            Chen, Baian and
            Wu, Jiajun and
            Wei, Donglai and
            Freeman, William T},
  title = {Video Enhancement with Task-Oriented Flow},
  journal = {arXiv},
  year = {2017}
}
Project: http://toflow.csail.mit.edu/
GitHub: https://github.com/anchen1011/toflow

Inputs to the script are (1) the directory to the downloaded and unzipped folder
(2) the filepath of the text-file that lists the subfolders of the triplets.

Output TFRecord is a tf.train.Example proto of each image triplet.
The feature_map takes the form:
  feature_map {
      'frame_0/encoded':
          tf.io.FixedLenFeature((), tf.string, default_value=''),
      'frame_0/format':
          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
      'frame_0/height':
          tf.io.FixedLenFeature((), tf.int64, default_value=0),
      'frame_0/width':
          tf.io.FixedLenFeature((), tf.int64, default_value=0),
      'frame_1/encoded':
          tf.io.FixedLenFeature((), tf.string, default_value=''),
      'frame_1/format':
          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
      'frame_1/height':
          tf.io.FixedLenFeature((), tf.int64, default_value=0),
      'frame_1/width':
          tf.io.FixedLenFeature((), tf.int64, default_value=0),
      'frame_2/encoded':
          tf.io.FixedLenFeature((), tf.string, default_value=''),
      'frame_2/format':
          tf.io.FixedLenFeature((), tf.string, default_value='jpg'),
      'frame_2/height':
          tf.io.FixedLenFeature((), tf.int64, default_value=0),
      'frame_2/width':
          tf.io.FixedLenFeature((), tf.int64, default_value=0)
      'path':
          tf.io.FixedLenFeature((), tf.string, default_value='')
  }

Usage example:
  python3 -m frame_interpolation.datasets.create_vimeo90K_tfrecord \
    --input_dir=<root folder of vimeo90K dataset> \
    --input_triplet_list_filepath=<filepath of tri_{test|train}list.txt> \
    --output_tfrecord_filepath=<output tfrecord filepath>
"""
import os

from . import util
from absl import app
from absl import flags
from absl import logging
import apache_beam as beam
import numpy as np
import tensorflow as tf


_INPUT_DIR = flags.DEFINE_string(
    'input_dir',
    default='/path/to/raw_vimeo_interp/sequences',
    help='Path to the root directory of the vimeo frame interpolation dataset. '
    'We expect the data to have been downloaded and unzipped.\n'
    'Folder structures:\n'
    '| raw_vimeo_dataset/\n'
    '|  sequences/\n'
    '|  |  00001\n'
    '|  |  |  0389/\n'
    '|  |  |  |  im1.png\n'
    '|  |  |  |  im2.png\n'
    '|  |  |  |  im3.png\n'
    '|  |  |  ...\n'
    '|  |  00002/\n'
    '|  |  ...\n'
    '|  readme.txt\n'
    '|  tri_trainlist.txt\n'
    '|  tri_testlist.txt \n')

_INTPUT_TRIPLET_LIST_FILEPATH = flags.DEFINE_string(
    'input_triplet_list_filepath',
    default='/path/to/raw_vimeo_dataset/tri_{test|train}list.txt',
    help='Text file containing a list of sub-directories of input triplets.')

_OUTPUT_TFRECORD_FILEPATH = flags.DEFINE_string(
    'output_tfrecord_filepath',
    default=None,
    help='Filepath to the output TFRecord file.')

_NUM_SHARDS = flags.DEFINE_integer('num_shards',
    default=200, # set to 3 for vimeo_test, and 200 for vimeo_train.
    help='Number of shards used for the output.')

# Image key -> basename for frame interpolator: start / middle / end frames.
_INTERPOLATOR_IMAGES_MAP = {
    'frame_0': 'im1.png',
    'frame_1': 'im2.png',
    'frame_2': 'im3.png',
}


def main(unused_argv):
  """Creates and runs a Beam pipeline to write frame triplets as a TFRecord."""
  with tf.io.gfile.GFile(_INTPUT_TRIPLET_LIST_FILEPATH.value, 'r') as fid:
    triplets_list = np.loadtxt(fid, dtype=str)

  triplet_dicts = []
  for triplet in triplets_list:
    triplet_dict = {
        image_key: os.path.join(_INPUT_DIR.value, triplet, image_basename)
        for image_key, image_basename in _INTERPOLATOR_IMAGES_MAP.items()
    }
    triplet_dicts.append(triplet_dict)
  p = beam.Pipeline('DirectRunner')
  (p | 'ReadInputTripletDicts' >> beam.Create(triplet_dicts)  # pylint: disable=expression-not-assigned
   | 'GenerateSingleExample' >> beam.ParDo(
       util.ExampleGenerator(_INTERPOLATOR_IMAGES_MAP))
   | 'WriteToTFRecord' >> beam.io.tfrecordio.WriteToTFRecord(
       file_path_prefix=_OUTPUT_TFRECORD_FILEPATH.value,
       num_shards=_NUM_SHARDS.value,
       coder=beam.coders.BytesCoder()))
  result = p.run()
  result.wait_until_finish()

  logging.info('Succeeded in creating the output TFRecord file: \'%s@%s\'.',
    _OUTPUT_TFRECORD_FILEPATH.value, str(_NUM_SHARDS.value))

if __name__ == '__main__':
  app.run(main)