diff --git a/docs/changelog.md b/docs/changelog.md
index 1627e771f2..5ff41d85c4 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -5,6 +5,7 @@
 **Highlights**
 
 **New Features**
+- Support the data pre-processing pipeline for the HVU Dataset ([#277](https://github.com/open-mmlab/mmaction2/pull/227/))
 - Support to run real-time action recognition from web camera ([#171](https://github.com/open-mmlab/mmaction2/pull/171))
 - Support to export pytorch models to onnx ([#160](https://github.com/open-mmlab/mmaction2/pull/160))
 - Support UCF101-24 preparation ([#219](https://github.com/open-mmlab/mmaction2/pull/219))
diff --git a/docs/data_preparation.md b/docs/data_preparation.md
index faa927e8e3..2ac07c3220 100644
--- a/docs/data_preparation.md
+++ b/docs/data_preparation.md
@@ -22,9 +22,10 @@ To ease usage, we provide tutorials of data deployment for each dataset.
 - [Something-Something V2](https://20bn.com/datasets/something-something): See [preparing_sthv2.md](/tools/data/sthv2/preparing_sthv2.md)
 - [Moments in Time](http://moments.csail.mit.edu/): See [preparing_mit.md](/tools/data/mit/preparing_mit.md)
 - [Multi-Moments in Time](http://moments.csail.mit.edu/challenge_iccv_2019.html): See [preparing_mmit.md](/tools/data/mmit/preparing_mmit.md)
-- ActivityNet_feature: See [praparing_activitynet.md](/tools/data/activitynet/preparing_activitynet.md)
+- [ActivityNet](http://activity-net.org/): See [praparing_activitynet.md](/tools/data/activitynet/preparing_activitynet.md)
 - [UCF101-24](http://www.thumos.info/download.html): See [preparing_ucf101_24.md](/tools/data/ucf101_24/preparing_ucf101_24.md)
 - [JHMDB](http://jhmdb.is.tue.mpg.de/): See [preparing_jhmdb](/tools/data/jhmdb/preparing_jhmdb.md)
+- [HVU](https://github.com/holistic-video-understanding/HVU-Dataset): See [preparing_hvu.md](/tools/data/hvu/preparing_hvu.md)
 
 Now, you can switch to [getting_started.md](getting_started.md) to train and test the model.
 
diff --git a/tools/data/hvu/download.py b/tools/data/hvu/download.py
new file mode 100644
index 0000000000..19fd8ad0c4
--- /dev/null
+++ b/tools/data/hvu/download.py
@@ -0,0 +1,200 @@
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/activitynet/ActivityNet/
+# Original licence: Copyright (c) Microsoft, under the MIT License.
+# ------------------------------------------------------------------------------
+
+import argparse
+import glob
+import os
+import shutil
+import ssl
+import subprocess
+import uuid
+
+import mmcv
+from joblib import Parallel, delayed
+
+ssl._create_default_https_context = ssl._create_unverified_context
+args = None
+
+
+def create_video_folders(dataset, output_dir, tmp_dir):
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    if not os.path.exists(tmp_dir):
+        os.makedirs(tmp_dir)
+
+
+def construct_video_filename(item, trim_format, output_dir):
+    """Given a dataset row, this function constructs the output filename for a
+    given video."""
+    youtube_id, start_time, end_time = item
+    start_time, end_time = int(start_time * 10), int(end_time * 10)
+    basename = '%s_%s_%s.mp4' % (youtube_id, trim_format % start_time,
+                                 trim_format % end_time)
+    output_filename = os.path.join(output_dir, basename)
+    return output_filename
+
+
+def download_clip(video_identifier,
+                  output_filename,
+                  start_time,
+                  end_time,
+                  tmp_dir='/tmp/hvu',
+                  num_attempts=5,
+                  url_base='https://www.youtube.com/watch?v='):
+    """Download a video from youtube if exists and is not blocked.
+    arguments:
+    ---------
+    video_identifier: str
+        Unique YouTube video identifier (11 characters)
+    output_filename: str
+        File path where the video will be stored.
+    start_time: float
+        Indicates the begining time in seconds from where the video
+        will be trimmed.
+    end_time: float
+        Indicates the ending time in seconds of the trimmed video.
+    """
+    # Defensive argument checking.
+    assert isinstance(video_identifier, str), 'video_identifier must be string'
+    assert isinstance(output_filename, str), 'output_filename must be string'
+    assert len(video_identifier) == 11, 'video_identifier must have length 11'
+
+    status = False
+    tmp_filename = os.path.join(tmp_dir, '%s.%%(ext)s' % uuid.uuid4())
+
+    if not os.path.exists(output_filename):
+        if not os.path.exists(tmp_filename):
+            command = [
+                'youtube-dl', '--quiet', '--no-warnings',
+                '--no-check-certificate', '-f', 'mp4', '-o',
+                '"%s"' % tmp_filename,
+                '"%s"' % (url_base + video_identifier)
+            ]
+            command = ' '.join(command)
+            print(command)
+            attempts = 0
+            while True:
+                try:
+                    subprocess.check_output(
+                        command, shell=True, stderr=subprocess.STDOUT)
+                except subprocess.CalledProcessError:
+                    attempts += 1
+                    if attempts == num_attempts:
+                        return status, 'Downloading Failed'
+                else:
+                    break
+
+        tmp_filename = glob.glob('%s*' % tmp_filename.split('.')[0])[0]
+        # Construct command to trim the videos (ffmpeg required).
+        command = [
+            'ffmpeg', '-i',
+            '"%s"' % tmp_filename, '-ss',
+            str(start_time), '-t',
+            str(end_time - start_time), '-c:v', 'libx264', '-c:a', 'copy',
+            '-threads', '1', '-loglevel', 'panic',
+            '"%s"' % output_filename
+        ]
+        command = ' '.join(command)
+        try:
+            subprocess.check_output(
+                command, shell=True, stderr=subprocess.STDOUT)
+        except subprocess.CalledProcessError:
+            return status, 'Trimming Failed'
+
+    # Check if the video was successfully saved.
+    status = os.path.exists(output_filename)
+    os.remove(tmp_filename)
+    return status, 'Downloaded'
+
+
+def download_clip_wrapper(item, trim_format, tmp_dir, output_dir):
+    """Wrapper for parallel processing purposes."""
+    output_filename = construct_video_filename(item, trim_format, output_dir)
+    clip_id = os.path.basename(output_filename).split('.mp4')[0]
+    if os.path.exists(output_filename):
+        status = tuple([clip_id, True, 'Exists'])
+        return status
+
+    youtube_id, start_time, end_time = item
+    downloaded, log = download_clip(
+        youtube_id, output_filename, start_time, end_time, tmp_dir=tmp_dir)
+
+    status = tuple([clip_id, downloaded, log])
+    return status
+
+
+def parse_hvu_annotations(input_csv):
+    """Returns a parsed DataFrame.
+    arguments:
+    ---------
+    input_csv: str
+        Path to CSV file containing the following columns:
+          'Tags, youtube_id, time_start, time_end'
+    returns:
+    -------
+    dataset: List of tuples. Each tuple consists of
+        (youtube_id, time_start, time_end). The type of time is float.
+    """
+    lines = open(input_csv).readlines()
+    lines = [x.strip().split(',')[1:] for x in lines[1:]]
+
+    lines = [(x[0], float(x[1]), float(x[2])) for x in lines]
+
+    return lines
+
+
+def main(input_csv,
+         output_dir,
+         trim_format='%06d',
+         num_jobs=24,
+         tmp_dir='/tmp/hvu'):
+    # Reading and parsing HVU.
+    dataset = parse_hvu_annotations(input_csv)
+
+    # Creates folders where videos will be saved later.
+    create_video_folders(dataset, output_dir, tmp_dir)
+
+    # Download all clips.
+    if num_jobs == 1:
+        status_lst = []
+        for item in dataset:
+            status_lst.append(
+                download_clip_wrapper(item, trim_format, tmp_dir, output_dir))
+    else:
+        status_lst = Parallel(n_jobs=num_jobs)(
+            delayed(download_clip_wrapper)(item, trim_format, tmp_dir,
+                                           output_dir) for item in dataset)
+
+    # Clean tmp dir.
+    shutil.rmtree(tmp_dir)
+    # Save download report.
+    mmcv.dump(status_lst, 'download_report.json')
+
+
+if __name__ == '__main__':
+    description = 'Helper script for downloading and trimming HVU videos.'
+    p = argparse.ArgumentParser(description=description)
+    p.add_argument(
+        'input_csv',
+        type=str,
+        help=('CSV file containing the following format: '
+              'Tags, youtube_id, time_start, time_end'))
+    p.add_argument(
+        'output_dir',
+        type=str,
+        help='Output directory where videos will be saved.')
+    p.add_argument(
+        '-f',
+        '--trim-format',
+        type=str,
+        default='%06d',
+        help=('This will be the format for the '
+              'filename of trimmed videos: '
+              'videoid_%0xd(start_time)_%0xd(end_time).mp4. '
+              'Note that the start_time is multiplied by 10 since '
+              'decimal exists somewhere. '))
+    p.add_argument('-n', '--num-jobs', type=int, default=24)
+    p.add_argument('-t', '--tmp-dir', type=str, default='/tmp/hvu')
+    main(**vars(p.parse_args()))
diff --git a/tools/data/hvu/download_annotations.sh b/tools/data/hvu/download_annotations.sh
new file mode 100644
index 0000000000..d100a47598
--- /dev/null
+++ b/tools/data/hvu/download_annotations.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+set -e
+
+DATA_DIR="../../../data/hvu/annotations"
+
+if [[ ! -d "${DATA_DIR}" ]]; then
+  echo "${DATA_DIR} does not exist. Creating";
+  mkdir -p ${DATA_DIR}
+fi
+
+git clone https://github.com/holistic-video-understanding/HVU-Dataset.git
+
+cd HVU-Dataset
+unzip -o HVU_Train_V1.0.zip
+unzip -o HVU_Val_V1.0.zip
+cd ..
+mv HVU-Dataset/HVU_Train_V1.0.csv ${DATA_DIR}/hvu_train.csv
+mv HVU-Dataset/HVU_Val_V1.0.csv ${DATA_DIR}/hvu_val.csv
+mv HVU-Dataset/HVU_Tags_Categories_V1.0.csv ${DATA_DIR}/hvu_categories.csv
+
+rm -rf HVU-Dataset
diff --git a/tools/data/hvu/download_videos.sh b/tools/data/hvu/download_videos.sh
new file mode 100644
index 0000000000..eca14cd812
--- /dev/null
+++ b/tools/data/hvu/download_videos.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+# set up environment
+conda env create -f environment.yml
+source activate hvu
+pip install --upgrade youtube-dl
+
+DATA_DIR="../../../data/hvu"
+ANNO_DIR="../../../data/hvu/annotations"
+python download.py ${ANNO_DIR}/hvu_train.csv ${DATA_DIR}/videos_train
+python download.py ${ANNO_DIR}/hvu_val.csv ${DATA_DIR}/videos_val
+
+source deactivate hvu
+conda remove -n hvu --all
diff --git a/tools/data/hvu/environment.yml b/tools/data/hvu/environment.yml
new file mode 100644
index 0000000000..86e7e1a24c
--- /dev/null
+++ b/tools/data/hvu/environment.yml
@@ -0,0 +1,36 @@
+name: kinetics
+channels:
+  - anaconda
+  - menpo
+  - conda-forge
+  - defaults
+dependencies:
+  - ca-certificates=2020.1.1
+  - certifi=2020.4.5.1
+  - ffmpeg=2.8.6
+  - libcxx=10.0.0
+  - libedit=3.1.20181209
+  - libffi=3.3
+  - ncurses=6.2
+  - openssl=1.1.1g
+  - pip=20.0.2
+  - python=3.7.7
+  - readline=8.0
+  - setuptools=46.4.0
+  - sqlite=3.31.1
+  - tk=8.6.8
+  - wheel=0.34.2
+  - xz=5.2.5
+  - zlib=1.2.11
+  - pip:
+    - decorator==4.4.2
+    - intel-openmp==2019.0
+    - joblib==0.15.1
+    - mkl==2019.0
+    - numpy==1.18.4
+    - olefile==0.46
+    - pandas==1.0.3
+    - python-dateutil==2.8.1
+    - pytz==2020.1
+    - six==1.14.0
+    - youtube-dl==2020.5.8
diff --git a/tools/data/hvu/extract_frames.sh b/tools/data/hvu/extract_frames.sh
new file mode 100644
index 0000000000..d50f1cf87b
--- /dev/null
+++ b/tools/data/hvu/extract_frames.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+cd ../
+python build_rawframes.py ../../data/hvu/videos_train/ ../../data/hvu/rawframes_train/ --level 1 --flow-type tvl1 --ext mp4 --task both  --new-short 256
+echo "Raw frames (RGB and tv-l1) Generated for train set"
+
+python build_rawframes.py ../../data/hvu/videos_val/ ../../data/hvu/rawframes_val/ --level 1 --flow-type tvl1 --ext mp4 --task both  --new-short 256
+echo "Raw frames (RGB and tv-l1) Generated for val set"
+
+cd hvu/
diff --git a/tools/data/hvu/generate_file_list.py b/tools/data/hvu/generate_file_list.py
new file mode 100644
index 0000000000..e76706a7ec
--- /dev/null
+++ b/tools/data/hvu/generate_file_list.py
@@ -0,0 +1,151 @@
+import argparse
+import fnmatch
+import glob
+import os
+import os.path as osp
+
+import mmcv
+
+annotation_root = '../../data/hvu/annotations'
+tag_file = 'hvu_tags.json'
+args = None
+
+
+def parse_directory(path,
+                    rgb_prefix='img_',
+                    flow_x_prefix='flow_x_',
+                    flow_y_prefix='flow_y_',
+                    level=1):
+    """Parse directories holding extracted frames from standard benchmarks.
+
+    Args:
+        path (str): Directory path to parse frames.
+        rgb_prefix (str): Prefix of generated rgb frames name.
+            default: 'img_'.
+        flow_x_prefix (str): Prefix of generated flow x name.
+            default: `flow_x_`.
+        flow_y_prefix (str): Prefix of generated flow y name.
+            default: `flow_y_`.
+        level (int): Directory level for glob searching. Options are 1 and 2.
+            default: 1.
+
+    Returns:
+        dict: frame info dict with video id as key and tuple(path(str),
+            rgb_num(int), flow_x_num(int)) as value.
+    """
+    print(f'parse frames under directory {path}')
+    if level == 1:
+        # Only search for one-level directory
+        def locate_directory(x):
+            return osp.basename(x)
+
+        frame_dirs = glob.glob(osp.join(path, '*'))
+
+    elif level == 2:
+        # search for two-level directory
+        def locate_directory(x):
+            return osp.join(osp.basename(osp.dirname(x)), osp.basename(x))
+
+        frame_dirs = glob.glob(osp.join(path, '*', '*'))
+
+    else:
+        raise ValueError('level can be only 1 or 2')
+
+    def count_files(directory, prefix_list):
+        """Count file number with a given directory and prefix.
+
+        Args:
+            directory (str): Data directory to be search.
+            prefix_list (list): List or prefix.
+
+        Returns:
+            list (int): Number list of the file with the prefix.
+        """
+        lst = os.listdir(directory)
+        cnt_list = [len(fnmatch.filter(lst, x + '*')) for x in prefix_list]
+        return cnt_list
+
+    # check RGB
+    frame_dict = {}
+    for i, frame_dir in enumerate(frame_dirs):
+        total_num = count_files(frame_dir,
+                                (rgb_prefix, flow_x_prefix, flow_y_prefix))
+        dir_name = locate_directory(frame_dir)
+
+        num_x = total_num[1]
+        num_y = total_num[2]
+        if num_x != num_y:
+            raise ValueError(f'x and y direction have different number '
+                             f'of flow images in video directory: {frame_dir}')
+        if i % 200 == 0:
+            print(f'{i} videos parsed')
+
+        frame_dict[dir_name] = (frame_dir, total_num[0], num_x)
+
+    print('frame directory analysis done')
+    return frame_dict
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='build file list for HVU')
+    parser.add_argument('--input_csv', type=str, help='path of input csv file')
+    parser.add_argument(
+        '--src_dir', type=str, help='source video / frames directory')
+    parser.add_argument(
+        '--output',
+        type=str,
+        help='output filename, should \
+        ends with .json')
+    parser.add_argument(
+        '--mode',
+        type=str,
+        choices=['frames', 'videos'],
+        help='generate file list for frames or videos')
+
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    tag_cates = mmcv.load(tag_file)
+    tag2category = {}
+    for k in tag_cates:
+        for tag in tag_cates[k]:
+            tag2category[tag] = k
+
+    data_list = open(args.input_csv).readlines()
+    data_list = [x.strip().split(',') for x in data_list[1:]]
+
+    if args.mode == 'videos':
+        downloaded = os.listdir(args.src_dir)
+        downloaded = [x.split('.')[0] for x in downloaded]
+        downloaded_set = set(downloaded)
+    else:
+        parse_result = parse_directory(args.src_dir)
+        downloaded_set = set(parse_result)
+
+    def parse_line(line):
+        tags, youtube_id, start, end = line
+        start, end = int(float(start) * 10), int(float(end) * 10)
+        newname = f'{youtube_id}_{start:06d}_{end:06d}'
+        tags = tags.split('|')
+        all_tags = {}
+        for tag in tags:
+            category = tag2category[tag]
+            all_tags.setdefault(category,
+                                []).append(tag_cates[category].index(tag))
+        return newname, all_tags
+
+    data_list = [parse_line(line) for line in data_list]
+    data_list = [line for line in data_list if line[0] in downloaded_set]
+
+    if args.mode == 'frames':
+        result = [
+            dict(
+                frame_dir=k[0], total_frames=parse_result[k[0]][1], label=k[1])
+            for k in data_list
+        ]
+    elif args.mode == 'videos':
+        result = [dict(filename=k[0] + '.mp4', label=k[1]) for k in data_list]
+    mmcv.dump(result, args.output)
diff --git a/tools/data/hvu/generate_rawframes_filelist.sh b/tools/data/hvu/generate_rawframes_filelist.sh
new file mode 100644
index 0000000000..59f3fa18bf
--- /dev/null
+++ b/tools/data/hvu/generate_rawframes_filelist.sh
@@ -0,0 +1,5 @@
+# to generate file list of frames
+python generate_file_list.py --input_csv ../../../data/hvu/annotations/hvu_train.csv --src_dir ../../../data/hvu/rawframes_train \
+    --output ../../../data/hvu/hvu_train.json --mode frames
+python generate_file_list.py --input_csv ../../../data/hvu/annotations/hvu_val.csv --src_dir ../../../data/hvu/rawframes_val \
+    --output ../../../data/hvu/hvu_val.json --mode frames
diff --git a/tools/data/hvu/generate_videos_filelist.sh b/tools/data/hvu/generate_videos_filelist.sh
new file mode 100644
index 0000000000..deba7b74d8
--- /dev/null
+++ b/tools/data/hvu/generate_videos_filelist.sh
@@ -0,0 +1,5 @@
+# to generate file lists of videos
+python generate_file_list.py --input_csv ../../../data/hvu/annotations/hvu_train.csv --src_dir ../../../data/hvu/videos_train \
+    --output ../../../data/hvu/hvu_train_video.json --mode videos
+python generate_file_list.py --input_csv ../../../data/hvu/annotations/hvu_val.csv --src_dir ../../../data/hvu/videos_val \
+    --output ../../../data/hvu/hvu_val_video.json --mode videos
diff --git a/tools/data/hvu/parse_tag_list.py b/tools/data/hvu/parse_tag_list.py
new file mode 100644
index 0000000000..41e8db1951
--- /dev/null
+++ b/tools/data/hvu/parse_tag_list.py
@@ -0,0 +1,15 @@
+import mmcv
+
+tag_list = '../../../data/hvu/annotations/hvu_categories.csv'
+
+lines = open(tag_list).readlines()
+lines = [x.strip().split(',') for x in lines[1:]]
+tag_categories = {}
+for line in lines:
+    tag, category = line
+    tag_categories.setdefault(category, []).append(tag)
+
+for k in tag_categories:
+    tag_categories[k].sort()
+
+mmcv.dump(tag_categories, 'hvu_tags.json')
diff --git a/tools/data/hvu/preparing_hvu.md b/tools/data/hvu/preparing_hvu.md
new file mode 100644
index 0000000000..49a437b6a5
--- /dev/null
+++ b/tools/data/hvu/preparing_hvu.md
@@ -0,0 +1,83 @@
+# Preparing HVU
+
+For basic dataset information, please refer to the official [project](https://github.com/holistic-video-understanding/HVU-Dataset/) and the [paper](https://arxiv.org/abs/1904.11451).
+Before we start, please make sure that the directory is located at `$MMACTION2/tools/data/hvu/`.
+
+## Step 1. Prepare Annotations
+
+First of all, you can run the following script to prepare annotations.
+
+```shell
+bash download_annotations.sh
+```
+
+Besides, you need to run the following command to parse the tag list of HVU.
+
+```shell
+python parse_tag_list.py
+```
+
+## Step 2. Prepare Videos
+
+Then, you can run the following script to prepare videos.
+The codes are adapted from the [official crawler](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics). Note that this might take a long time.
+
+```shell
+bash download_videos.sh
+```
+
+## Step 3. Extract RGB and Flow
+
+This part is **optional** if you only want to use the video loader.
+
+Before extracting, please refer to [install.md](/docs/install.md) for installing [denseflow](https://github.com/open-mmlab/denseflow).
+
+You can use the following script to extract both RGB and Flow frames.
+
+```shell
+bash extract_frames.sh
+```
+
+By default, we generate frames with short edge resized to 256.
+More details can be found in [data_preparation](/docs/data_preparation.md)
+
+## Step 4. Generate File List
+
+you can run the follow scripts to generate file list in the format of videos and rawframes, respectively.
+
+```shell
+bash generate_videos_filelist.sh
+# execute the command below when rawframes are ready
+bash generate_rawframes_filelist.sh
+```
+
+## Step 5. Folder Structure
+
+After the whole data pipeline for HVU preparation.
+you can get the rawframes (RGB + Flow), videos and annotation files for HVU.
+
+In the context of the whole project (for HVU only), the full folder structure will look like:
+
+```
+mmaction2
+├── mmaction
+├── tools
+├── configs
+├── data
+│   ├── hvu
+│   │   ├── hvu_train_video.json
+│   │   ├── hvu_val_video.json
+│   │   ├── hvu_train.json
+│   │   ├── hvu_val.json
+│   │   ├── annotations
+│   │   ├── videos_train
+│   │   │   ├── OLpWTpTC4P8_000570_000670.mp4
+│   │   │   ├── xsPKW4tZZBc_002330_002430.mp4
+│   │   │   ├── ...
+│   │   ├── videos_val
+│   │   ├── rawframes_train
+│   │   ├── rawframes_val
+
+```
+
+For training and evaluating on HVU, please refer to [getting_started](/docs/getting_started.md).
diff --git a/tools/data/kinetics400/download.py b/tools/data/kinetics400/download.py
index 417fc96703..d1281ee79d 100755
--- a/tools/data/kinetics400/download.py
+++ b/tools/data/kinetics400/download.py
@@ -1,19 +1,19 @@
-# This scripts is copied from
-# https://github.com/activitynet/ActivityNet/blob/master/Crawler/Kinetics/download.py  # noqa: E501
-# The code is licensed under the MIT licence.
+# ------------------------------------------------------------------------------
+# Adapted from https://github.com/activitynet/ActivityNet/
+# Original licence: Copyright (c) Microsoft, under the MIT License.
+# ------------------------------------------------------------------------------
 import argparse
 import glob
 import json
 import os
 import shutil
+import ssl
 import subprocess
+import uuid
 from collections import OrderedDict
 
-import ssl  # isort:skip
-import uuid  # isort:skip
-
-import pandas as pd  # isort:skip
-from joblib import Parallel, delayed  # isort:skip
+import pandas as pd
+from joblib import Parallel, delayed
 
 ssl._create_default_https_context = ssl._create_unverified_context